Python 2.7.6 (default, Nov 10 2013, 19:24:24) [MSC v.1500 64 bit (AMD64)] on win32
Type "copyright", "credits" or "license()" for more information.
DreamPie 1.2.1
>>> import os
...
...
... os.chdir('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\wbce-BodyTextExtractorFilter\\')
>>> files = [f for f in os.listdir('.') if not os.path.isdir(f)]
>>> len(files)
0: 14
>>>
==================== New Session ====================
>>> import os
...
...
... os.chdir('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\wbce-BodyTextExtractorFilter\\')
Traceback (most recent call last):
File "<pyshell#0>", line 4, in <module>
os.chdir('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\wbce-BodyTextExtractorFilter\\')
WindowsError: [Error 2] The system cannot find the file specified: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\wbce-BodyTextExtractorFilter\\'
>>> import os
...
...
... os.chdir('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\')
>>> files = [f for f in os.listdir('.') if not os.path.isdir(f)]
>>> len(files)
0: 14
>>> for f in files:
... domain,year,name = f.split('$$')
... newpath = os.path.join('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\wbce-BodyTextExtractorFilter',
... domain,year,name[:50]+'.txt')
... oldpath = os.path.join('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\wbce-BodyTextExtractorFilter',
... f)
...
... print(newpath)
... print(oldpath)
...
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_201.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\entertainment.msn.com$$2010$$20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_ts.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\thenation.com$$2010$$20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_inte.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.forbes.com$$2010$$20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_t.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.forbes.com$$2010$$20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.forbes.com$$2010$$20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence .txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com$$2010$$Bomber Fooled CIA, Family, Jordanian Intelligence - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com\2010\Canada to Use Full-Body Scanners in Airports - Int.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com$$2010$$Canada to Use Full-Body Scanners in Airports - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com\2010\Dubai Opens World's Tallest Building - Internation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com$$2010$$Dubai Opens World's Tallest Building - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Mis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com$$2010$$Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen t.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com$$2010$$Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com$$2010$$Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting B.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com$$2010$$TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com\2010\United Airlines Pilot Charged With Being Over Alco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.foxnews.com$$2010$$United Airlines Pilot Charged With Being Over Alcohol Limit - International News News of the World Middle East News Europe News - FOXN.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.latimes.com\2010\Drug maker Johnson & Johnson paid kickbacks to meg.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\wbce-BodyTextExtractorFilter\www.latimes.com$$2010$$Drug maker Johnson & Johnson paid kickbacks to mega-pharmacy, U.S. charges in civil complaint D.C. Now Los Angeles Times.txt
>>> for f in files:
... domain,year,name = f.split('$$')
... newpath = os.path.join('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\wbce-BodyTextExtractorFilter',
... domain,year,name+'.txt')
... oldpath = os.path.join('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\wbce-BodyTextExtractorFilter',
... f)
... os.rename(oldpath,newpath)
...
...
...
Traceback (most recent call last):
File "<pyshell#5>", line 7, in <module>
os.rename(oldpath,newpath)
WindowsError: [Error 3] The system cannot find the path specified
>>> for f in files:
... domain,year,name = f.split('$$')
... newpath = os.path.join('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter',
... domain,year,name+'.txt')
... oldpath = os.path.join('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter',
... f)
... os.rename(oldpath,newpath)
...
...
...
>>> import os
...
... os.chdir('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\ContentCodeBlurringFilter\\')
...
... files = [f for f in os.listdir('.') if not os.path.isdir(f)]
>>> import os
...
... os.chdir('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\ContentCodeBlurringFilter\\')
...
... files = [f for f in os.listdir('.') if not os.path.isdir(f)]
...
... len(files)
1: 20
>>> import os
...
...
... basedir = 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\ContentCodeBlurringFilter'
... os.chdir(basedir)
...
... files = [f for f in os.listdir('.') if not os.path.isdir(f)]
...
... len(files)
...
...
... for f in files:
... domain,year,name = f.split('$$')
... newpath = os.path.join(basedir,domain,year,name+'.txt')
... oldpath = os.path.join(basedir,f)
... os.rename(oldpath,newpath)
...
...
...
2: 20
>>> import os
...
...
... basedir = 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\DocumentSlopeCurveFilter'
... os.chdir(basedir)
...
... files = [f for f in os.listdir('.') if not os.path.isdir(f)]
...
... len(files)
...
...
... for f in files:
... domain,year,name = f.split('$$')
... newpath = os.path.join(basedir,domain,year,name+'.txt')
... oldpath = os.path.join(basedir,f)
... os.rename(oldpath,newpath)
...
...
...
3: 17
>>> import os
...
...
... basedir = 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\FeatureExtractorDomFilter'
... os.chdir(basedir)
...
... files = [f for f in os.listdir('.') if not os.path.isdir(f)]
...
... len(files)
...
...
... for f in files:
... domain,year,name = f.split('$$')
... newpath = os.path.join(basedir,domain,year,name+'.txt')
... oldpath = os.path.join(basedir,f)
... os.rename(oldpath,newpath)
...
...
...
4: 20
>>> import os
...
...
... basedir = 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\FeatureExtractorSplitFilter'
... os.chdir(basedir)
...
... files = [f for f in os.listdir('.') if not os.path.isdir(f)]
...
... len(files)
...
...
... for f in files:
... domain,year,name = f.split('$$')
... newpath = os.path.join(basedir,domain,year,name+'.txt')
... oldpath = os.path.join(basedir,f)
... os.rename(oldpath,newpath)
...
...
...
5: 21
>>> import os
...
...
... basedir = 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\GeneralCCB'
... os.chdir(basedir)
...
... files = [f for f in os.listdir('.') if not os.path.isdir(f)]
...
... len(files)
...
...
... for f in files:
... domain,year,name = f.split('$$')
... newpath = os.path.join(basedir,domain,year,name+'.txt')
... oldpath = os.path.join(basedir,f)
... os.rename(oldpath,newpath)
...
...
...
6: 5
>>> import os
...
...
... basedir = 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\KFeatureExtractorDomFilter'
... os.chdir(basedir)
...
... files = [f for f in os.listdir('.') if not os.path.isdir(f)]
...
... len(files)
...
...
... for f in files:
... domain,year,name = f.split('$$')
... newpath = os.path.join(basedir,domain,year,name+'.txt')
... oldpath = os.path.join(basedir,f)
... os.rename(oldpath,newpath)
...
...
...
7: 21
>>> import os
...
...
... basedir = 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter'
... os.chdir(basedir)
...
... files = [f for f in os.listdir('.') if not os.path.isdir(f)]
...
... len(files)
...
...
... for f in files:
... domain,year,name = f.split('$$')
... newpath = os.path.join(basedir,domain,year,name+'.txt')
... oldpath = os.path.join(basedir,f)
... os.rename(oldpath,newpath)
...
...
...
8: 7
>>> def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r',encoding="latin1") as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r',encoding="utf8") as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
>>> linkquotapath = 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter'
>>> def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
>>> listsubdir([linkquotapath])
9: ['c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com',
'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\news.bbc.co.uk',
'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\news.yahoo.com',
'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\thenation.com',
'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\www.cnn.com',
'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\www.esquire.com',
'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\www.forbes.com',
'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\www.foxnews.com',
'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\www.latimes.com',
'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\www.nymag.com']
>>> def prepare_for_measurements(testset_directory,
... goldfile_ext='.txt',
... testfile_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext),"",name)
... for name in os.listdir(testset_directory) ])
...
... print("prep_data_for_measurements",filenames[0])
...
... golden_dict = { name:
... {
... 'domain': os.path.split(os.path.split(testset_directory)[0])[1],
... 'year': os.path.split(testset_directory)[1],
... 'goldpath':os.path.join(testset_directory, name+goldfile_ext),
... 'testpath':os.path.join(testset_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return golden_dict
[About 29 more lines. Double-click to unfold]
>>> test_dir = "c:/crawlToTheFuture/crawl-to-the-future/dataset/"
>>> testset_folders = listsubdir(listsubdir(test_dir))
Traceback (most recent call last):
File "<pyshell#22>", line 1, in <module>
testset_folders = listsubdir(listsubdir(test_dir))
File "<pyshell#18>", line 3, in listsubdir
return [os.path.join(d, f) for d in directory for f in os.listdir(d)
WindowsError: [Error 3] The system cannot find the path specified: 'c/*.*'
[About 5 more lines. Double-click to unfold]
>>> testset_folders = listsubdir(listsubdir([test_dir]))
>>> testset_folders
10: ['c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\\2015']
[About 39 more lines. Double-click to unfold]
>>> x = prepare_for_measurements(testset_folders[0])
Traceback (most recent call last):
File "<pyshell#25>", line 1, in <module>
x = prepare_for_measurements(testset_folders[0])
File "<pyshell#20>", line 17, in prepare_for_measurements
for name in os.listdir(testset_directory) ])
NameError: global name 're' is not defined
[About 5 more lines. Double-click to unfold]
>>> import re
>>> x = prepare_for_measurements(testset_folders[0])
Traceback (most recent call last):
File "<pyshell#27>", line 1, in <module>
x = prepare_for_measurements(testset_folders[0])
File "<pyshell#20>", line 19, in prepare_for_measurements
print("prep_data_for_measurements",filenames[0])
TypeError: 'set' object does not support indexing
[About 5 more lines. Double-click to unfold]
>>> x = prepare_for_measurements(testset_folders[0])
Traceback (most recent call last):
File "<pyshell#28>", line 1, in <module>
x = prepare_for_measurements(testset_folders[0])
File "<pyshell#20>", line 19, in prepare_for_measurements
print("prep_data_for_measurements",filenames[0])
TypeError: 'set' object does not support indexing
[About 5 more lines. Double-click to unfold]
>>> def prepare_for_measurements(testset_directory,
... goldfile_ext='.txt',
... testfile_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext),"",name)
... for name in os.listdir(testset_directory) ])
...
...
...
... golden_dict = { name:
... {
... 'domain': os.path.split(os.path.split(testset_directory)[0])[1],
... 'year': os.path.split(testset_directory)[1],
... 'goldpath':os.path.join(testset_directory, name+goldfile_ext),
... 'testpath':os.path.join(testset_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return golden_dict
[About 29 more lines. Double-click to unfold]
>>> x = prepare_for_measurements(testset_folders[0])
>>> x
11: {'20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_movies_video_video_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.html',
'year': '2000'},
'20001110075800_entertainment_citysearch': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.html',
'year': '2000'},
'20001110075800_entertainment_citysearch_html': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_holiday_tv_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_music_news_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'}}
[About 199 more lines. Double-click to unfold]
>>> def prepare_for_measurements(gold_directory,
...
... goldfile_ext='.txt',
... testfile_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext),"",name)
... for name in os.listdir(testset_directory) ])
...
...
...
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(testset_directory)[0])[1],
... 'year': os.path.split(testset_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'testpath':os.path.join(gold_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
[About 30 more lines. Double-click to unfold]
>>> x = prepare_for_measurements(testset_folders[0])
Traceback (most recent call last):
File "<pyshell#33>", line 1, in <module>
x = prepare_for_measurements(testset_folders[0])
File "<pyshell#32>", line 18, in prepare_for_measurements
for name in os.listdir(testset_directory) ])
NameError: global name 'testset_directory' is not defined
[About 5 more lines. Double-click to unfold]
>>> def prepare_for_measurements(gold_directory,
...
... goldfile_ext='.txt',
... testfile_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext),"",name)
... for name in os.listdir(testset_directory) ])
...
...
...
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(testset_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'testpath':os.path.join(gold_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
[About 30 more lines. Double-click to unfold]
>>> x = prepare_for_measurements(testset_folders[0])
Traceback (most recent call last):
File "<pyshell#35>", line 1, in <module>
x = prepare_for_measurements(testset_folders[0])
File "<pyshell#34>", line 18, in prepare_for_measurements
for name in os.listdir(testset_directory) ])
NameError: global name 'testset_directory' is not defined
[About 5 more lines. Double-click to unfold]
>>> def prepare_for_measurements(gold_directory,
...
... goldfile_ext='.txt',
... testfile_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext),"",name)
... for name in os.listdir(testset_directory) ])
...
...
...
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'testpath':os.path.join(gold_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
[About 30 more lines. Double-click to unfold]
>>> x = prepare_for_measurements(testset_folders[0])
Traceback (most recent call last):
File "<pyshell#37>", line 1, in <module>
x = prepare_for_measurements(testset_folders[0])
File "<pyshell#36>", line 18, in prepare_for_measurements
for name in os.listdir(testset_directory) ])
NameError: global name 'testset_directory' is not defined
[About 5 more lines. Double-click to unfold]
>>> def prepare_for_measurements(gold_directory,
...
... goldfile_ext='.txt',
... testfile_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'testpath':os.path.join(gold_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
[About 30 more lines. Double-click to unfold]
>>> x = prepare_for_measurements(testset_folders[0])
>>> x
12: {'20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_movies_video_video_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.html',
'year': '2000'},
'20001110075800_entertainment_citysearch': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.html',
'year': '2000'},
'20001110075800_entertainment_citysearch_html': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_holiday_tv_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_music_news_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'}}
[About 199 more lines. Double-click to unfold]
>>> def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'testpath':os.path.join(gold_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
[About 40 more lines. Double-click to unfold]
>>> x = prepare_for_measurements(testset_folders[0], testfile_ext='.txt')
>>> x['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
13: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'year': '2000'}
[About 3 more lines. Double-click to unfold]
>>> test_linkquota = listsubdir([linkquotapath])
>>> testset_folders[0]
14: 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000'
>>> test_linkquota[0]
15: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com'
>>> test_linkquota = listsubdir(listsubdir([linkquotapath]))
>>> test_linkquota[0]
16: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000'
>>> x = prepare_for_measurements(testset_folders[0], test_linkquota[0], testfile_ext='.txt')
>>> x['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
17: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'year': '2000'}
>>> def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'testpath':os.path.join(gold_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> x = prepare_for_measurements(testset_folders[0], test_linkquota[0], testfile_ext='.txt')
>>> x['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
18: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'}
>>> def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> x = prepare_for_measurements(testset_folders[0], test_linkquota[0], testfile_ext='.txt')
>>> x['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
19: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'}
[About 6 more lines. Double-click to unfold]
>>> def calc_praf(goldstd,predicted,universe):
...
... TP = predicted & goldstd
...
... FP = predicted - goldstd
...
... FN = (universe - predicted) & goldstd
...
... TN = (universe - predicted) & (universe - goldstd)
...
... precision = (histsum(TP)*1.0)/histsum(TP+FP)
...
... recall = (histsum(TP)*1.0)/histsum(TP+FN)
...
... accuracy = (histsum(TP+TN)*1.0)/histsum(TP+FP+FN+TN)
... try:
... f1 = 2 * (((precision*recall)*1.0)/(precision+recall))
... except ZeroDivisionError:
... f1 = 0
...
...
... return {'p':precision,'r':recall,'a':accuracy,'f1':f1}
[About 21 more lines. Double-click to unfold]
>>>
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r',encoding="utf8") as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r',encoding="latin1") as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
[About 46 more lines. Double-click to unfold]
>>> import re
... import collections
... import os
... try:
... from StringIO import StringIO
... except ImportError:
... from io import StringIO
...
... import lxml.html
...
... try:
... range = xrange
... except:
... pass
...
... def histsum(hist): return sum(hist.values())
[About 15 more lines. Double-click to unfold]
>>> def take_measurements(prep_data):
...
... for key, val in prep_data.items():
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
>>> take_measurements(x)
Traceback (most recent call last):
File "<pyshell#61>", line 1, in <module>
take_measurements(x)
File "<pyshell#60>", line 6, in take_measurements
universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
File "<pyshell#58>", line 5, in tokens_to_hist_from_universe
with open(data_filepath,'r',encoding="utf8") as data_file:
TypeError: 'encoding' is an invalid keyword argument for this function
>>>
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r',encoding="latin1") as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
[About 46 more lines. Double-click to unfold]
>>> take_measurements(x)
Traceback (most recent call last):
File "<pyshell#63>", line 1, in <module>
take_measurements(x)
File "<pyshell#60>", line 8, in take_measurements
goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
File "<pyshell#62>", line 23, in tokens_to_hist_goldstd
with open(os.path.abspath(test_filepath), 'r',encoding="latin1") as f:
TypeError: 'encoding' is an invalid keyword argument for this function
[About 7 more lines. Double-click to unfold]
>>>
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
[About 46 more lines. Double-click to unfold]
>>> take_measurements(x)
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.html.txt
Traceback (most recent call last):
File "<pyshell#65>", line 1, in <module>
take_measurements(x)
File "<pyshell#60>", line 4, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#64>", line 38, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#60>", line 4, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.html.txt'
[About 10 more lines. Double-click to unfold]
>>> x['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
20: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'measurements': {'a': 0.8906115417743324,
'f1': 0.7434343434343433,
'p': 0.6411149825783972,
'r': 0.8846153846153846},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'}
>>> take_measurements(x)
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.html.txt
Traceback (most recent call last):
File "<pyshell#67>", line 1, in <module>
take_measurements(x)
File "<pyshell#60>", line 4, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#64>", line 38, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#60>", line 4, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.html.txt'
[About 10 more lines. Double-click to unfold]
>>>
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
... print(data_filepath)
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
[About 47 more lines. Double-click to unfold]
>>> def take_measurements(prep_data):
...
... for key, val in prep_data.items():
... print('take_measurements:',val['testpath'])
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
>>> take_measurements(x)
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.html.txt
Traceback (most recent call last):
File "<pyshell#70>", line 1, in <module>
take_measurements(x)
File "<pyshell#69>", line 5, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#68>", line 39, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#69>", line 5, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.html.txt'
>>> testset_folders
21: ['c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\\2015']
[About 39 more lines. Double-click to unfold]
>>> filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext),"",name)
... for name in os.listdir(testset_folders[0]) ])
Traceback (most recent call last):
File "<pyshell#72>", line 2, in <module>
for name in os.listdir(testset_folders[0]) ])
NameError: name 'goldfile_ext' is not defined
>>> filenames = set([re.sub(("\\"+ '.txt.' +"|"+"\\"+ '.html'),"",name)
... for name in os.listdir(testset_folders[0]) ])
>>> filenames[0]
Traceback (most recent call last):
File "<pyshell#74>", line 1, in <module>
filenames[0]
TypeError: 'set' object does not support indexing
>>> filenames
22: set(['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp',
'20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp',
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp',
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'20001018072019_www_entertainment_msn_com_movies_video_video_asp',
'20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp',
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp',
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp',
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'20001110075800_entertainment_citysearch',
'20001110075800_entertainment_citysearch.txt',
'20001110075800_entertainment_citysearch_html',
'20001110075800_entertainment_citysearch_html.txt',
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp',
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp',
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp',
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp',
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp',
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp',
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp',
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp',
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'20001206091000_entertainment_msn_com_holiday_tv_asp',
'20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'20001206091000_entertainment_msn_com_music_news_wed01_asp',
'20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp',
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt'])
[About 49 more lines. Double-click to unfold]
>>> def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_set),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
[About 43 more lines. Double-click to unfold]
>>> x = prepare_for_measurements(testset_folders[0], test_linkquota[0], testfile_ext='.txt')
Traceback (most recent call last):
File "<pyshell#77>", line 1, in <module>
x = prepare_for_measurements(testset_folders[0], test_linkquota[0], testfile_ext='.txt')
File "<pyshell#76>", line 19, in prepare_for_measurements
for name in os.listdir(gold_directory)])
NameError: global name 'universe_set' is not defined
>>> def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
[About 43 more lines. Double-click to unfold]
>>> x = prepare_for_measurements(testset_folders[0], test_linkquota[0], testfile_ext='.txt')
>>> filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext),"",name)
... for name in os.listdir(testset_folders[0]) ])
Traceback (most recent call last):
File "<pyshell#80>", line 2, in <module>
for name in os.listdir(testset_folders[0]) ])
NameError: name 'goldfile_ext' is not defined
>>> filenames = set([re.sub(("\\"+ '.txt.' +"|"+"\\"+ '.html'),"",name)
... for name in os.listdir(testset_folders[0]) ])
>>> filenames
23: set(['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp',
'20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp',
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp',
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'20001018072019_www_entertainment_msn_com_movies_video_video_asp',
'20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp',
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp',
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp',
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'20001110075800_entertainment_citysearch',
'20001110075800_entertainment_citysearch.txt',
'20001110075800_entertainment_citysearch_html',
'20001110075800_entertainment_citysearch_html.txt',
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp',
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp',
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp',
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp',
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp',
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp',
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp',
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp',
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'20001206091000_entertainment_msn_com_holiday_tv_asp',
'20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'20001206091000_entertainment_msn_com_music_news_wed01_asp',
'20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp',
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt'])
[About 49 more lines. Double-click to unfold]
>>> os.listdir(testset_folders[0])
24: ['20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.html',
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.html',
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'20001018072019_www_entertainment_msn_com_movies_video_video_asp.html',
'20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.html',
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.html',
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.html',
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.html',
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.html',
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.html',
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.html',
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.html',
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'20001110075800_entertainment_citysearch.html',
'20001110075800_entertainment_citysearch.txt',
'20001110075800_entertainment_citysearch_html.html',
'20001110075800_entertainment_citysearch_html.txt',
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.html',
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp.html',
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.html',
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp.html',
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.html',
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.html',
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.html',
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.html',
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'20001206091000_entertainment_msn_com_holiday_tv_asp.html',
'20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'20001206091000_entertainment_msn_com_music_news_wed01_asp.html',
'20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp.html',
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt']
[About 49 more lines. Double-click to unfold]
>>> filenames = set([re.sub(("\\"+ '.txt' +"|"+"\\"+ '.html'),"",name)
... for name in os.listdir(testset_folders[0]) ])
>>> filenames
25: set(['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp',
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp',
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp',
'20001018072019_www_entertainment_msn_com_movies_video_video_asp',
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp',
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp',
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp',
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp',
'20001110075800_entertainment_citysearch',
'20001110075800_entertainment_citysearch_html',
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp',
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp',
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp',
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp',
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp',
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp',
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp',
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp',
'20001206091000_entertainment_msn_com_holiday_tv_asp',
'20001206091000_entertainment_msn_com_music_news_wed01_asp',
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp'])
[About 24 more lines. Double-click to unfold]
>>> x = prepare_for_measurements(testset_folders[0], test_linkquota[0], testfile_ext='.txt')
>>> x['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
26: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'}
[About 6 more lines. Double-click to unfold]
>>> x['20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp']
27: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.html',
'year': '2000'}
[About 6 more lines. Double-click to unfold]
>>> len(x.keys())
28: 25
>>> take_measurements(x)
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
29: {'20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'measurements': {'a': 0.8906115417743324,
'f1': 0.7434343434343433,
'p': 0.6411149825783972,
'r': 0.8846153846153846},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'measurements': {'a': 0.8870168483647175,
'f1': 0.4622641509433962,
'p': 0.3202614379084967,
'r': 0.8305084745762712},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'measurements': {'a': 0.8997756170531039,
'f1': 0.8404761904761906,
'p': 0.7741228070175439,
'r': 0.9192708333333334},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_movies_video_video_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'measurements': {'a': 0.9304979253112033,
'f1': 0.9330669330669331,
'p': 0.9192913385826772,
'r': 0.947261663286004},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'measurements': {'a': 0.8876058506543495,
'f1': 0.8519269776876267,
'p': 0.7909604519774012,
'r': 0.9230769230769231},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'measurements': {'a': 0.8938181818181818,
'f1': 0.8089005235602095,
'p': 0.7322274881516587,
'r': 0.9035087719298246},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'measurements': {'a': 0.8930817610062893,
'f1': 0.8034682080924855,
'p': 0.7202072538860104,
'r': 0.9084967320261438},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'measurements': {'a': 0.8708771929824561,
'f1': 0.8491803278688524,
'p': 0.7719821162444114,
'r': 0.9435336976320583},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'measurements': {'a': 0.8369659982563208,
'f1': 0.6175869120654397,
'p': 0.4886731391585761,
'r': 0.8388888888888889},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'measurements': {'a': 0.8608695652173913,
'f1': 0.7743589743589743,
'p': 0.6817155756207675,
'r': 0.8961424332344213},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'measurements': {'a': 0.8429319371727748,
'f1': 0.6875000000000001,
'p': 0.5739130434782609,
'r': 0.8571428571428571},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'measurements': {'a': 0.8483455882352942,
'f1': 0.6405228758169934,
'p': 0.5176056338028169,
'r': 0.84},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.html',
'year': '2000'},
'20001110075800_entertainment_citysearch': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'measurements': {'a': 0.8987012987012987,
'f1': 0.8276877761413844,
'p': 0.7473404255319149,
'r': 0.9273927392739274},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.html',
'year': '2000'},
'20001110075800_entertainment_citysearch_html': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'measurements': {'a': 0.870575221238938,
'f1': 0.7719298245614035,
'p': 0.673469387755102,
'r': 0.9041095890410958},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'measurements': {'a': 0.892325996430696,
'f1': 0.8507831821929102,
'p': 0.8087774294670846,
'r': 0.8973913043478261},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'measurements': {'a': 0.8891228070175439,
'f1': 0.8704918032786886,
'p': 0.8219814241486069,
'r': 0.9250871080139372},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'measurements': {'a': 0.8882480173035328,
'f1': 0.8651000870322019,
'p': 0.8094462540716613,
'r': 0.9289719626168225},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'measurements': {'a': 0.8646080760095012,
'f1': 0.7381316998468606,
'p': 0.6731843575418994,
'r': 0.8169491525423729},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'measurements': {'a': 0.9012016021361816,
'f1': 0.8435517970401691,
'p': 0.7823529411764706,
'r': 0.9151376146788991},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'measurements': {'a': 0.8988023952095808,
'f1': 0.8668242710795901,
'p': 0.8308157099697885,
'r': 0.9060955518945635},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'measurements': {'a': 0.8430629264594389,
'f1': 0.7389659520807061,
'p': 0.6369565217391304,
'r': 0.8798798798798799},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'measurements': {'a': 0.8364406779661017,
'f1': 0.6666666666666666,
'p': 0.5376044568245125,
'r': 0.8772727272727273},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_holiday_tv_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'measurements': {'a': 0.9415675297410777,
'f1': 0.9534689328503761,
'p': 0.9406267179769104,
'r': 0.9666666666666667},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_music_news_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'measurements': {'a': 0.8780487804878049,
'f1': 0.8494711147274208,
'p': 0.786144578313253,
'r': 0.9238938053097345},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'measurements': {'a': 0.8459657701711492,
'f1': 0.7060653188180405,
'p': 0.58656330749354,
'r': 0.88671875},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'}}
[About 535 more lines. Double-click to unfold]
>>> x['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
30: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'measurements': {'a': 0.8906115417743324,
'f1': 0.7434343434343433,
'p': 0.6411149825783972,
'r': 0.8846153846153846},
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'}
[About 8 more lines. Double-click to unfold]
>>> for i in range(len(testset_folders)):
... print testset_folders[i], test_linkquota[i]
c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\2000 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\2005 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\2010 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\2015 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\2000 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\2005 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\2010 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\2015 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\2000 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\2005 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\2010 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\2015 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\2000 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\2005 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\2010 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\2015 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\2000 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\2005 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\2010 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\2015 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\2000 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\2005 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\2010 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\2015 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\2000 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\2005 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\2010 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\2015 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\2000 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\2005 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\2010 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\2015 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\2000 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\2005 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\2010 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\2015 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\2000 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\2005 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\2010 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\2015 c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015
[About 81 more lines. Double-click to unfold]
>>> for i in range(len(testset_folders)):
... print testset_folders[i], "\n", test_linkquota[i]
c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015
[About 81 more lines. Double-click to unfold]
>>> for i in range(len(testset_folders)):
... print testset_folders[i]
... print test_linkquota[i]
c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.bbc.co.uk\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/news.yahoo.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/thenation.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.cnn.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.esquire.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.forbes.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.foxnews.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.latimes.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\2000
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\2005
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\2010
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010
c:/crawlToTheFuture/crawl-to-the-future/dataset/www.nymag.com\2015
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015
[About 81 more lines. Double-click to unfold]
>>> linkquotatest = []
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(testset_folders[i], test_linkquota[i], testfile_ext='.txt')
...
... linkquotatest.append(prep_data)
>>> import pickle
>>> os.listdir('.')
31: ['entertainment.msn.com',
'news.bbc.co.uk',
'news.yahoo.com',
'thenation.com',
'www.cnn.com',
'www.esquire.com',
'www.forbes.com',
'www.foxnews.com',
'www.latimes.com',
'www.nymag.com']
>>> pickle.dump(linkquotatest,open('./results.pkl','wb'))
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
>>> wcbe_subdirs = listsubdir([wcbe_path])
>>> wcbe_subdirs
32: ['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter']
>>> testset_folders[0]
33: 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000'
>>> len(linkquotatest)
34: 40
>>> linkquotatest[0]
35: {'20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_movies_video_video_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.html',
'year': '2000'},
'20001110075800_entertainment_citysearch': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.html',
'year': '2000'},
'20001110075800_entertainment_citysearch_html': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_holiday_tv_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_music_news_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'}}
[About 281 more lines. Double-click to unfold]
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
...
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(testset_folders[i], test_linkquota[i], testfile_ext='.txt')
...
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
>>> len(linkquotatest)
36: 40
>>> linkquotatest[0]
37: {'20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_movies_video_video_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.html',
'year': '2000'},
'20001110075800_entertainment_citysearch': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.html',
'year': '2000'},
'20001110075800_entertainment_citysearch_html': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_holiday_tv_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_music_news_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'}}
[About 299 more lines. Double-click to unfold]
>>>
>>> linkquotatest[0]['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
38: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'}
>>> len(linkquotatest[0].keys())
39: 25
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
...
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(testset_folders[i], test_linkquota[i], testfile_ext='.txt')
... get_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
Traceback (most recent call last):
File "<pyshell#111>", line 14, in <module>
get_measurements(prep_data)
NameError: name 'get_measurements' is not defined
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
...
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(testset_folders[i], test_linkquota[i], testfile_ext='.txt')
... take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
('take_measurements:', 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
Traceback (most recent call last):
File "<pyshell#112>", line 14, in <module>
take_measurements(prep_data)
File "<pyshell#69>", line 7, in take_measurements
universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
File "<pyshell#68>", line 4, in tokens_to_hist_from_universe
with open(data_filepath,'r') as data_file:
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.html'
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050512080141_entertainment_msn_com_movies_hotgossip.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005MSN - News - Going Batty_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005beyond the cape and the cowl_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
('take_measurements:', "c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt")
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050809075007_movies_msn_com_movies_filmfashion.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005Movies -- Adult Fairy Tales_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20051125173813_entertainment_msn_com_movies_dvd_extras.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20051217084007_entertainment_msn_com_movies_dvd_gay.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050520085141_entertainment_msn_com_artistofthemonth.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050520085141_entertainment_msn_com_music_hotgossipB.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050525031445_entertainment_msn_com_tv_hotgossipc.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005creepykids_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
('take_measurements:', "c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt")
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
('take_measurements:', "c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt")
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20051212142307_entertainment_msn_com_music_hotgossipc.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005MSN - News - Bridging the Generation Gap_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100821104840Mom & Pop Culture_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100818062626A Conversation with Christian Jacobs_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
Traceback (most recent call last):
File "<pyshell#113>", line 14, in <module>
take_measurements(prep_data)
File "<pyshell#69>", line 5, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#68>", line 39, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#69>", line 5, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt'
[About 278 more lines. Double-click to unfold]
>>> linkquotatest[0]
40: {'20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'measurements': {'a': 0.9472774416594641,
'f1': 0.8551068883610451,
'p': 0.8695652173913043,
'r': 0.8411214953271028},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'measurements': {'a': 0.9530469530469531,
'f1': 0.6356589147286822,
'p': 0.6029411764705882,
'r': 0.6721311475409836},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'measurements': {'a': 0.9497374343585896,
'f1': 0.9130998702983139,
'p': 0.9263157894736842,
'r': 0.9002557544757033},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_movies_video_video_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'measurements': {'a': 0.9333680374804789,
'f1': 0.9330543933054394,
'p': 0.970620239390642,
'r': 0.8982880161127895},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'measurements': {'a': 0.9303030303030303,
'f1': 0.8993435448577681,
'p': 0.9383561643835616,
'r': 0.8634453781512605},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'measurements': {'a': 0.9454148471615721,
'f1': 0.8898678414096917,
'p': 0.9181818181818182,
'r': 0.8632478632478633},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'measurements': {'a': 0.9419152276295133,
'f1': 0.87987012987013,
'p': 0.9093959731543624,
'r': 0.8522012578616353},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'measurements': {'a': 0.9037656903765691,
'f1': 0.8804159445407279,
'p': 0.8581081081081081,
'r': 0.9039145907473309},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'measurements': {'a': 0.8803122289679098,
'f1': 0.676056338028169,
'p': 0.6075949367088608,
'r': 0.7619047619047619},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'measurements': {'a': 0.894695170229612,
'f1': 0.8129395218002814,
'p': 0.774798927613941,
'r': 0.8550295857988166},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'measurements': {'a': 0.8812227074235808,
'f1': 0.7301587301587301,
'p': 0.6789667896678967,
'r': 0.7896995708154506},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'measurements': {'a': 0.8959484346224678,
'f1': 0.7064935064935064,
'p': 0.6507177033492823,
'r': 0.7727272727272727},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.html',
'year': '2000'},
'20001110075800_entertainment_citysearch': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'measurements': {'a': 0.9509632224168126,
'f1': 0.9087947882736157,
'p': 0.9,
'r': 0.9177631578947368},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.html',
'year': '2000'},
'20001110075800_entertainment_citysearch_html': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'measurements': {'a': 0.927536231884058,
'f1': 0.8558758314855874,
'p': 0.8577777777777778,
'r': 0.8539823008849557},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'measurements': {'a': 0.9328579916815211,
'f1': 0.8986547085201794,
'p': 0.9488636363636364,
'r': 0.8534923339011925},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'measurements': {'a': 0.9335180055401662,
'f1': 0.916083916083916,
'p': 0.9509981851179673,
'r': 0.8836424957841484},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'measurements': {'a': 0.9379900213827512,
'f1': 0.9186155285313378,
'p': 0.9478764478764479,
'r': 0.8911070780399274},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'measurements': {'a': 0.9224137931034483,
'f1': 0.8266199649737302,
'p': 0.8973384030418251,
'r': 0.7662337662337663},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'measurements': {'a': 0.9472981987991995,
'f1': 0.9090909090909091,
'p': 0.9360189573459715,
'r': 0.883668903803132},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'measurements': {'a': 0.9401555954518253,
'f1': 0.9159663865546219,
'p': 0.9527972027972028,
'r': 0.8818770226537217},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'measurements': {'a': 0.889644746787604,
'f1': 0.7972222222222222,
'p': 0.7572559366754618,
'r': 0.841642228739003},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'measurements': {'a': 0.8910472972972973,
'f1': 0.7425149700598803,
'p': 0.6813186813186813,
'r': 0.8157894736842105},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_holiday_tv_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'measurements': {'a': 0.9470013947001394,
'f1': 0.9573273441886581,
'p': 0.9567901234567902,
'r': 0.9578651685393258},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_music_news_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'measurements': {'a': 0.933852140077821,
'f1': 0.9098939929328622,
'p': 0.9501845018450185,
'r': 0.8728813559322034},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'measurements': {'a': 0.8989405052974735,
'f1': 0.7847222222222223,
'p': 0.7174603174603175,
'r': 0.8659003831417624},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'}}
[About 420 more lines. Double-click to unfold]
>>> linkquotatest[0]['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
41: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'measurements': {'a': 0.9472774416594641,
'f1': 0.8551068883610451,
'p': 0.8695652173913043,
'r': 0.8411214953271028},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'}
>>> x = pickle.load(open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/documentSlopeCurveFilter/results.pkl','rb'))
>>> x[0]
42: {'20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_movies_video_video_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.html',
'year': '2000'},
'20001110075800_entertainment_citysearch': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.html',
'year': '2000'},
'20001110075800_entertainment_citysearch_html': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_holiday_tv_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_music_news_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'}}
[About 305 more lines. Double-click to unfold]
>>> x[0]['20001019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
Traceback (most recent call last):
File "<pyshell#118>", line 1, in <module>
x[0]['20001019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
KeyError: '20001019_www_entertainment_msn_com_Especial_1013_bratpack_asp'
>>> x[0]['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
43: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'}
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050512080141_entertainment_msn_com_movies_hotgossip.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005MSN - News - Going Batty_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005beyond the cape and the cowl_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
('take_measurements:', "c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt")
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050809075007_movies_msn_com_movies_filmfashion.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005Movies -- Adult Fairy Tales_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20051125173813_entertainment_msn_com_movies_dvd_extras.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20051217084007_entertainment_msn_com_movies_dvd_gay.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050520085141_entertainment_msn_com_artistofthemonth.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050520085141_entertainment_msn_com_music_hotgossipB.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050525031445_entertainment_msn_com_tv_hotgossipc.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005creepykids_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
('take_measurements:', "c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt")
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
('take_measurements:', "c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt")
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20051212142307_entertainment_msn_com_music_hotgossipc.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\2005MSN - News - Bridging the Generation Gap_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005\\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100821104840Mom & Pop Culture_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100818062626A Conversation with Christian Jacobs_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
('take_measurements:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
Traceback (most recent call last):
File "<pyshell#120>", line 14, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#69>", line 5, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#68>", line 39, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#69>", line 5, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt'
[About 278 more lines. Double-click to unfold]
>>> x = pickle.load(open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/documentSlopeCurveFilter/results.pkl','rb'))
>>> x[0]['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
44: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'}
>>> x[0]['20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp']
45: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'testpath': 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.html',
'year': '2000'}
>>> linkquotatest[0]
46: {'20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'measurements': {'a': 0.9472774416594641,
'f1': 0.8551068883610451,
'p': 0.8695652173913043,
'r': 0.8411214953271028},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'measurements': {'a': 0.9530469530469531,
'f1': 0.6356589147286822,
'p': 0.6029411764705882,
'r': 0.6721311475409836},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'measurements': {'a': 0.9497374343585896,
'f1': 0.9130998702983139,
'p': 0.9263157894736842,
'r': 0.9002557544757033},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_movies_video_video_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'measurements': {'a': 0.9333680374804789,
'f1': 0.9330543933054394,
'p': 0.970620239390642,
'r': 0.8982880161127895},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'measurements': {'a': 0.9303030303030303,
'f1': 0.8993435448577681,
'p': 0.9383561643835616,
'r': 0.8634453781512605},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'measurements': {'a': 0.9454148471615721,
'f1': 0.8898678414096917,
'p': 0.9181818181818182,
'r': 0.8632478632478633},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'measurements': {'a': 0.9419152276295133,
'f1': 0.87987012987013,
'p': 0.9093959731543624,
'r': 0.8522012578616353},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'measurements': {'a': 0.9037656903765691,
'f1': 0.8804159445407279,
'p': 0.8581081081081081,
'r': 0.9039145907473309},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'measurements': {'a': 0.8803122289679098,
'f1': 0.676056338028169,
'p': 0.6075949367088608,
'r': 0.7619047619047619},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'measurements': {'a': 0.894695170229612,
'f1': 0.8129395218002814,
'p': 0.774798927613941,
'r': 0.8550295857988166},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'measurements': {'a': 0.8812227074235808,
'f1': 0.7301587301587301,
'p': 0.6789667896678967,
'r': 0.7896995708154506},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'measurements': {'a': 0.8959484346224678,
'f1': 0.7064935064935064,
'p': 0.6507177033492823,
'r': 0.7727272727272727},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.html',
'year': '2000'},
'20001110075800_entertainment_citysearch': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'measurements': {'a': 0.9509632224168126,
'f1': 0.9087947882736157,
'p': 0.9,
'r': 0.9177631578947368},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.html',
'year': '2000'},
'20001110075800_entertainment_citysearch_html': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'measurements': {'a': 0.927536231884058,
'f1': 0.8558758314855874,
'p': 0.8577777777777778,
'r': 0.8539823008849557},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'measurements': {'a': 0.9328579916815211,
'f1': 0.8986547085201794,
'p': 0.9488636363636364,
'r': 0.8534923339011925},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'measurements': {'a': 0.9335180055401662,
'f1': 0.916083916083916,
'p': 0.9509981851179673,
'r': 0.8836424957841484},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'measurements': {'a': 0.9379900213827512,
'f1': 0.9186155285313378,
'p': 0.9478764478764479,
'r': 0.8911070780399274},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'measurements': {'a': 0.9224137931034483,
'f1': 0.8266199649737302,
'p': 0.8973384030418251,
'r': 0.7662337662337663},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'measurements': {'a': 0.9472981987991995,
'f1': 0.9090909090909091,
'p': 0.9360189573459715,
'r': 0.883668903803132},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'measurements': {'a': 0.9401555954518253,
'f1': 0.9159663865546219,
'p': 0.9527972027972028,
'r': 0.8818770226537217},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'measurements': {'a': 0.889644746787604,
'f1': 0.7972222222222222,
'p': 0.7572559366754618,
'r': 0.841642228739003},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'measurements': {'a': 0.8910472972972973,
'f1': 0.7425149700598803,
'p': 0.6813186813186813,
'r': 0.8157894736842105},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_holiday_tv_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'measurements': {'a': 0.9470013947001394,
'f1': 0.9573273441886581,
'p': 0.9567901234567902,
'r': 0.9578651685393258},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_music_news_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'measurements': {'a': 0.933852140077821,
'f1': 0.9098939929328622,
'p': 0.9501845018450185,
'r': 0.8728813559322034},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'measurements': {'a': 0.8989405052974735,
'f1': 0.7847222222222223,
'p': 0.7174603174603175,
'r': 0.8659003831417624},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'}}
[About 420 more lines. Double-click to unfold]
>>> linkquotatest[0]['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
47: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'measurements': {'a': 0.9472774416594641,
'f1': 0.8551068883610451,
'p': 0.8695652173913043,
'r': 0.8411214953271028},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'}
>>> def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
Traceback (most recent call last):
File "<pyshell#127>", line 14, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#126>", line 5, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#68>", line 39, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#126>", line 5, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt'
[About 134 more lines. Double-click to unfold]
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
Traceback (most recent call last):
File "<pyshell#128>", line 14, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#126>", line 5, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#68>", line 39, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#126>", line 5, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt'
[About 134 more lines. Double-click to unfold]
>>> testset_folders
48: ['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\news.bbc.co.uk\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\news.bbc.co.uk\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\news.bbc.co.uk\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\news.bbc.co.uk\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\news.yahoo.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\news.yahoo.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\news.yahoo.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\news.yahoo.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\thenation.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\thenation.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\thenation.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\thenation.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.cnn.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.cnn.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.cnn.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.cnn.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.esquire.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.esquire.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.esquire.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.esquire.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.forbes.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.forbes.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.forbes.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.forbes.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.latimes.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.latimes.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.latimes.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.latimes.com\\2015',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.nymag.com\\2000',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.nymag.com\\2005',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.nymag.com\\2010',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.nymag.com\\2015']
[About 54 more lines. Double-click to unfold]
>>> test_dir
49: 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
>>> o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt','r')
Traceback (most recent call last):
File "<pyshell#131>", line 1, in <module>
o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt','r')
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt'
>>>
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
... print("testset folder:",testset_folders[i])
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
('testset folder:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000')
('testset folder:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005')
('testset folder:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
Traceback (most recent call last):
File "<pyshell#133>", line 14, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#126>", line 5, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#132>", line 40, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#126>", line 5, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt'
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
... print("testset folder:",testset_folders[i])
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
('testset folder:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000')
('testset folder:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005')
('testset folder:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013.txt
Traceback (most recent call last):
File "<pyshell#134>", line 14, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#126>", line 5, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#132>", line 40, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#126>", line 5, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013.txt'
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
... print("testset folder:",testset_folders[i])
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
('testset folder:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000')
('testset folder:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2005')
('testset folder:', 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010')
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013.txt
Traceback (most recent call last):
File "<pyshell#135>", line 14, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#126>", line 5, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#132>", line 40, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#126>", line 5, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013.txt'
>>> len(linkquotatest)
50: 2
>>> linkquotatest[0]['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
51: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'measurements': {'a': 0.9472774416594641,
'f1': 0.8551068883610451,
'p': 0.8695652173913043,
'r': 0.8411214953271028},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'}
>>> o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013.txt','r')
Traceback (most recent call last):
File "<pyshell#138>", line 1, in <module>
o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013.txt','r')
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013.txt'
>>> o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013','r')
Traceback (most recent call last):
File "<pyshell#139>", line 1, in <module>
o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013','r')
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013'
>>> o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview.txt','r')
>>> test_dir
52: 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
>>>
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
>>> test_dir
53: 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
>>>
==================== New Session ====================
>>>
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> import pickel
... import re
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
Traceback (most recent call last):
File "<pyshell#1>", line 1, in <module>
import pickel
ImportError: No module named pickel
>>> import pickle
... import re
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
Traceback (most recent call last):
File "<pyshell#3>", line 4, in <module>
goldset_folders = listsubdir(listsubdir([test_dir]))
File "<pyshell#2>", line 4, in listsubdir
return [os.path.join(d, f) for d in directory for f in os.listdir(d)
NameError: global name 'os' is not defined
>>> import pickle
... import re
... import os
...
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
Traceback (most recent call last):
File "<pyshell#5>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#4>", line 65, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#4>", line 55, in tokens_to_hist_extractor
tokenized_content = collections.Counter()
NameError: global name 'collections' is not defined
>>> import pickle
... import re
... import os
... import collections
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
<HTML>
<HEAD>
<script type="text/javascript" src="/static/js/analytics.js"></script>
<script type="text/javascript">archive_analytics.values.server_name="wwwb-app19.us.archive.org";archive_analytics.values.server_ms=184;</script>
<link type="text/css" rel="stylesheet" href="/static/css/banner-styles.css"/>
<META HTTP-EQUIV="Description" NAME="Description" CONTENT="MSN Entertainment Channel brings you the latest news on Books, Music, Movies, and TV Shows. The best movie reviews, great CD and book reviews, & entertainment chat rooms, get the latest word on movie stars, musical artists, & other celebrities.">
<META HTTP-EQUIV="Keywords" NAME="Keywords" CONTENT="chat, music, chat rooms, movies, news, chatrooms, chat room, books, food, celebrities, chatroom, video, videos, movie, movie reviews, movie times, hollywood, free chat, free chat rooms, microsoft chat, cameras, movie stars, tv, book reviews, tv listings, msn chat, book, tv shows, gossip, books on tape, entertainment news">
<META name = "description" Content= "TV - What's On Right Now, Top Story, Gossip & News, Week's Top 10 Television Shows (Nielsen Ratings), Today's TV Talk (Chat Rooms), Cool Clicks - links to web's coolest TV sites & best selling videos.">
<Meta name="Keywords" content="<i>The 72nd Academy Awards</i> ,<i>Countdown to Oscar 2000</i>,<i>Who Wants to be a Millionaire</i>,<i>E.R.</i>,<i>Who Wants to be a Millionaire</i>,<I>Who Wants to be a Millionaire</I>,<i>Friends</i>,<i>Frasier</i>,<i>Daddio</i> ,<i>Law & Order</i>,, Television, tv, tv listings, tv shows, stars, movie stars, moviestars, talk shows, television shows, drama, comedy, video, videos">
<META http-equiv="PICS-Label" content='(PICS-1.0 "http://www.rsac.org/ratingsv01.html" l comment "RSACi North America Server" by "Microsoft Network" on "1996.04.16T08:15-0500" r (n 0 s 0 v 0 l 0))'>
<TITLE>MSN Entertainment TV Channel</TITLE>
</HEAD>
<!--StyleSheet//-->
<STYLE>
<!--
A {color:#000000;}
A:hover {color:#FF3300;}
A:active {color:color:#000000;}
A:visited {color:color:#000000;}
A:visited:hover {color:#FF3300;}
.rL{color:black;text-decoration:none;font-family:Arial;font-size:10pt;}
.tL {font-face:verdana,sans-serif;color:#000000;}
.tL:hover{color:#FF3300;}
.tL:active{color:#000000;}
.tL:visited{color:#000000;}
.tL:visited:hover{color:#FF3300;}
.ttL {font-face:arial,sans-serif;color:#000000;text-decoration:none;}
.ttL:hover{color:#FF3300;text-decoration:none;}
.ttL:active{color:#000000;}
.ttL:visited{color:#000000;}
.ttL:visited:hover{color:#FF3300;text-decoration:none;}
.tsL {font-face:arial,sans-serif;color:#CC9900;text-decoration:none;}
.tsL:hover{color:#FF3300;text-decoration:underline;}
.tsL:active{color:#CC9900;}
.tsL:visited{color:#CC9900;}
.tsL:visited:hover{color:#FF3300;text-decoration:underline;}
.t{font:bold small arial,sans-serif;color:#FFFFFF;text-decoration:none;font-size: 10pt;}
.tS{font:bold small arial,sans-serif;color:#FFFFFF;text-decoration:none;font-size: 10pt;}
.fL{font-face:arial,sans-serif;font-size: 8pt;color:black;}
.fL:hover{color:#FF3300;}
.fL:active{color:#000000;}
.fL:visited{color:#000000;}
.fL:visited:hover{color:#FF3300;}
//-->
</STYLE>
<!--Site Parameters//-->
<BODY BGCOLOR="#FFFFFF" TOPMARGIN=0 LEFTMARGIN=0 TEXT="#000000" LINK="#000000" VLINK="#000000" ALINK="#000000">
<!-- BEGIN WAYBACK TOOLBAR INSERT -->
<script type="text/javascript" src="/static/js/disclaim-element.js" ></script>
<script type="text/javascript" src="/static/js/graph-calc.js" ></script>
<script type="text/javascript" src="/static/jflot/jquery.min.js" ></script>
<script type="text/javascript">//<![CDATA[
var __wm = (function(){
var wbPrefix = "/web/";
var wbCurrentUrl = "http://entertainment.msn.com/tv/gist/fri02.asp";
var firstYear = 1996;
var imgWidth = 500,imgHeight = 27;
var yearImgWidth = 25,monthImgWidth = 2;
var displayDay = "9";
var displayMonth = "Nov";
var displayYear = "2000";
var prettyMonths = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"];
var $D=document,$=function(n){return document.getElementById(n)};
var trackerVal,curYear = -1,curMonth = -1;
var yearTracker,monthTracker;
function showTrackers(val) {
if (val===trackerVal) return;
var $ipp=$("wm-ipp");
var $y=$("displayYearEl"),$m=$("displayMonthEl"),$d=$("displayDayEl");
if (val) {
$ipp.className="hi";
} else {
$ipp.className="";
$y.innerHTML=displayYear;$m.innerHTML=displayMonth;$d.innerHTML=displayDay;
}
yearTracker.style.display=val?"inline":"none";
monthTracker.style.display=val?"inline":"none";
trackerVal = val;
}
function getElementX2(obj) {
var $e=jQuery(obj);
return (typeof $e=="undefined"||typeof $e.offset=="undefined")?
getElementX(obj):Math.round($e.offset().left);
}
function trackMouseMove(event,element) {
var eventX = getEventX(event);
var elementX = getElementX2(element);
var xOff = Math.min(Math.max(0, eventX - elementX),imgWidth);
var monthOff = xOff % yearImgWidth;
var year = Math.floor(xOff / yearImgWidth);
var monthOfYear = Math.min(11,Math.floor(monthOff / monthImgWidth));
// 1 extra border pixel at the left edge of the year:
var month = (year * 12) + monthOfYear;
var day = monthOff % 2==1?15:1;
var dateString = zeroPad(year + firstYear) + zeroPad(monthOfYear+1,2) +
zeroPad(day,2) + "000000";
$("displayYearEl").innerHTML=year+firstYear;
$("displayMonthEl").innerHTML=prettyMonths[monthOfYear];
// looks too jarring when it changes..
//$("displayDayEl").innerHTML=zeroPad(day,2);
var url = wbPrefix + dateString + '/' + wbCurrentUrl;
$("wm-graph-anchor").href=url;
if(curYear != year) {
var yrOff = year * yearImgWidth;
yearTracker.style.left = yrOff + "px";
curYear = year;
}
if(curMonth != month) {
var mtOff = year + (month * monthImgWidth) + 1;
monthTracker.style.left = mtOff + "px";
curMonth = month;
}
}
function hideToolbar() {
$("wm-ipp").style.display="none";
}
function bootstrap() {
var $spk=$("wm-ipp-sparkline");
yearTracker=$D.createElement('div');
yearTracker.className='yt';
with(yearTracker.style){
display='none';width=yearImgWidth+"px";height=imgHeight+"px";
}
monthTracker=$D.createElement('div');
monthTracker.className='mt';
with(monthTracker.style){
display='none';width=monthImgWidth+"px";height=imgHeight+"px";
}
$spk.appendChild(yearTracker);
$spk.appendChild(monthTracker);
var $ipp=$("wm-ipp");
$ipp&&disclaimElement($ipp);
}
return{st:showTrackers,mv:trackMouseMove,h:hideToolbar,bt:bootstrap};
})();//]]>
</script>
<style type="text/css">
body {
margin-top:0 !important;
padding-top:0 !important;
min-width:800px !important;
}
</style>
<div id="wm-ipp" lang="en" style="display:none;">
<div style="position:fixed;left:0;top:0;width:100%!important">
<div id="wm-ipp-inside">
<table style="width:100%;"><tbody><tr>
<td id="wm-logo">
<a href="/web/" title="Wayback Machine home page"><img src="/static/images/toolbar/wayback-toolbar-logo.png" alt="Wayback Machine" width="110" height="39" border="0" /></a>
</td>
<td class="c">
<table style="margin:0 auto;"><tbody><tr>
<td class="u" colspan="2">
<form target="_top" method="get" action="/web/form-submit.jsp" name="wmtb" id="wmtb"><input type="text" name="url" id="wmtbURL" value="http://entertainment.msn.com/tv/gist/fri02.asp" style="width:400px;" onfocus="this.focus();this.select();" /><input type="hidden" name="type" value="replay" /><input type="hidden" name="date" value="20001109011800" /><input type="submit" value="Go" /><span id="wm_tb_options" style="display:block;"></span></form>
</td>
<td class="n" rowspan="2">
<table><tbody>
<!-- NEXT/PREV MONTH NAV AND MONTH INDICATOR -->
<tr class="m">
<td class="b" nowrap="nowrap">
<a href="/web/20000818044432/http://entertainment.msn.com/tv/gist/fri02.asp" title="18 Aug 2000">AUG</a>
</td>
<td class="c" id="displayMonthEl" title="You are here: 1:18:00 Nov 9, 2000">NOV</td>
<td class="f" nowrap="nowrap">
<a href="/web/20010212142755/http://entertainment.msn.com/tv/gist/fri02.asp" title="12 Feb 2001"><strong>FEB</strong></a>
</td>
</tr>
<!-- NEXT/PREV CAPTURE NAV AND DAY OF MONTH INDICATOR -->
<tr class="d">
<td class="b" nowrap="nowrap">
<a href="/web/20001017145824/http://entertainment.msn.com/tv/gist/fri02.asp" title="14:58:24 Oct 17, 2000"><img src="/static/images/toolbar/wm_tb_prv_on.png" alt="Previous capture" width="14" height="16" border="0" /></a>
</td>
<td class="c" id="displayDayEl" style="width:34px;font-size:24px;" title="You are here: 1:18:00 Nov 9, 2000">9</td>
<td class="f" nowrap="nowrap">
<a href="/web/20001204130300/http://www.entertainment.msn.com/tv/gist/fri02.asp" title="13:03:00 Dec 4, 2000"><img src="/static/images/toolbar/wm_tb_nxt_on.png" alt="Next capture" width="14" height="16" border="0" /></a>
</td>
</tr>
<!-- NEXT/PREV YEAR NAV AND YEAR INDICATOR -->
<tr class="y">
<td class="b" nowrap="nowrap">
1999
</td>
<td class="c" id="displayYearEl" title="You are here: 1:18:00 Nov 9, 2000">2000</td>
<td class="f" nowrap="nowrap">
<a href="/web/20030301014912/http://entertainment.msn.com/tv/gist/fri02.asp" title="1 Mar 2003"><strong>2003</strong></a>
</td>
</tr>
</tbody></table>
</td>
</tr>
<tr>
<td class="s">
<a class="t" href="/web/20001109011800*/http://entertainment.msn.com/tv/gist/fri02.asp" title="See a list of every capture for this URL">20 captures</a>
<div class="r" title="Timespan for captures of this URL">10 May 00 - 5 Dec 04</div>
</td>
<td class="k">
<a href="" id="wm-graph-anchor">
<div id="wm-ipp-sparkline" title="Explore captures for this URL">
<img id="sparklineImgId" alt="sparklines"
onmouseover="__wm.st(1)" onmouseout="__wm.st(0)"
onmousemove="__wm.mv(event,this)"
width="500"
height="27"
border="0"
src="/web/jsp/graph.jsp?graphdata=500_27_1996:-1:000000000000_1997:-1:000000000000_1998:-1:000000000000_1999:-1:000000000000_2000:10:000032010312_2001:-1:010000000000_2002:-1:000000000000_2003:-1:001001001010_2004:-1:000010010001_2005:-1:000000000000_2006:-1:000000000000_2007:-1:000000000000_2008:-1:000000000000_2009:-1:000000000000_2010:-1:000000000000_2011:-1:000000000000_2012:-1:000000000000_2013:-1:000000000000_2014:-1:000000000000_2015:-1:000000000000" />
</div>
</a>
</td>
</tr></tbody></table>
</td>
<td class="r">
<a href="#close" onclick="__wm.h();return false;" style="background-image:url(/static/images/toolbar/wm_tb_close.png);top:5px;" title="Close the toolbar">Close</a>
<a href="http://faq.web.archive.org/" style="background-image:url(/static/images/toolbar/wm_tb_help.png);bottom:5px;" title="Get some help using the Wayback Machine">Help</a>
</td>
</tr></tbody></table>
</div>
</div>
</div>
<script type="text/javascript">__wm.bt();</script>
<!-- END WAYBACK TOOLBAR INSERT -->
<BASEFONT FACE="ARIAL,SANS-SERIF" SIZE=2>
<!-- Custom Codes for the site //-->
<!--Networktable_Top//-->
<!--Network Container Table //-->
<TABLE CELLPADDING=0 CELLSPACING=0 BORDER=0 BGCOLOR="#FFFFFF">
<TR>
<TD VALIGN="TOP">
<!--Network Header Blips//-->
<!--Network Header Blips//-->
<table cellpadding=0 cellspacing=0 border=0>
<tr>
<td height=2><IMG BORDER="0" SRC="/web/20001109011800im_/http://entertainment.msn.com/images/spacer01.gif" WIDTH="1" HEIGHT="1" ALT=""></td>
</tr>
<tr>
<td>
<table cellpadding=0 cellspacing=0 border=0 width=608>
<tr>
<td width=1 bgcolor=#FFFFFF></td>
<td width=92 align=center><font size=2 face=verdana,sans-serif color=#000000><a href="/web/20001109011800/http://go.msn.com/A5/7/" class=tL><b>MSN Home</b></a></font></td>
<td width=1 bgcolor=#000000></td>
<td width=72 align=center><font size=2 face=verdana,sans-serif color=#000000><a href="/web/20001109011800/http://go.msn.com/A5/8/" class=tL><b>Hotmail</b></a></font></td>
<td width=1 bgcolor=#000000></td>
<td width=100 align=center><font size=2 face=verdana,sans-serif color=#000000><a href="/web/20001109011800/http://go.msn.com/A5/A/" class=tL><b>Web Search</b></a></font></td>
<td width=1 bgcolor=#000000></td>
<td width=81 align=center><font size=2 face=verdana,sans-serif color=#000000><a href="/web/20001109011800/http://go.msn.com/A5/9/" class=tL><b>Shopping</b></a></font></td>
<td width=1 bgcolor=#000000></td>
<td width=62 align=center><font size=2 face=verdana,sans-serif color=#000000><a href="/web/20001109011800/http://go.msn.com/A5/C/" class=tL><b>Money</b></a></font></td>
<td width=1 bgcolor=#000000></td>
<td width=116 align=center><font size=2 face=verdana,sans-serif color=#000000><a href="/web/20001109011800/http://go.msn.com/A5/B/" class=tL><b>People & Chat</b></a></font></td>
<td width=1 bgcolor=#000000></td>
<td width=77 align=right>
<img src=/web/20001109011800im_/http://entertainment.msn.com/images/spacer01.gif WIDTH=67 HEIGHT=19 BORDER=0 HSPACE=10>
</td>
<td width=1 bgcolor=#000000></td>
</tr>
</table>
</td>
</tr>
<tr>
<td height=2><IMG BORDER="0" SRC="/web/20001109011800im_/http://entertainment.msn.com/images/spacer01.gif" WIDTH="1" HEIGHT="1" ALT=""></td>
</tr>
</table>
<table cellpadding=0 cellspacing=0 border=0 width=608>
<tr>
<td height=60> <A HREF="/web/20001109011800/http://go.msn.com/A5/0/"><IMG SRC="/web/20001109011800im_/http://entertainment.msn.com/images/logo.gif" WIDTH=140 HEIGHT=60 BORDER=0 ALT="go to MSN.com" align=middle></A> </td>
<td height=60 width=468><A HREF="/web/20001109011800/http://ads.msn.com/ads/redirect.dll/CID=0009e0c897844cf000000000/AREA=ENTGEN?image=http://ads.msn.com/ads/ENTGEN/00482CP0096_LG.gif" TARGET="_top"><IMG SRC="/web/20001109011800im_/http://ads.msn.com/ads/ENTGEN/00482CP0096_LG.gif" ALT="Win a Hummer for a Weekend!" BORDER=0 WIDTH=468 HEIGHT=60></A></td>
</tr>
<tr>
<td height=1 colspan=2></td>
</tr>
</table>
<!--PlaceHolder //-->
<!-- Header 4// -->
<table cellpadding=0 cellspacing=0 border=0 width=608 bordercolor=#FFFFFF>
<tr>
<td width=300 height=32 align=left bgcolor=#FFFFFF><font size=5 face=arial,sans-serif color=#000000><b><a href="/web/20001109011800/http://entertainment.msn.com/" class=ttL> Entertainment</a></b></font></td>
<td width=100% align=left valign=middle bgcolor=#FFFFFF><IMG BORDER="0" SRC="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" WIDTH="1" HEIGHT="1" ALT=""></td>
</tr>
</table>
<!--Network ToolBar Functions//-->
<!--Network ToolBar Functions//-->
<script language="Javascript">
<!--
function mOvr(src,clrOver){
if (!src.contains(event.fromElement)){
src.style.cursor = 'hand'; src.bgColor = clrOver;
}
}
function mOut(src,clrIn){
if (!src.contains(event.toElement)){
src.style.cursor = 'default';
src.bgColor = clrIn;
}
}
function mClk(src){
if(event.srcElement.tagName=='TD'){
src.children.tags('A')[0].click();
}
}
//-->
</script>
<!--Network ToolBar//-->
<!--Network ToolBar//-->
<table cellpadding=0 cellspacing=0 border=0 bgcolor=#336699 width=608 height=17>
<tr>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Home </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/books/books.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Books </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/celebs/celebs.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Celebs </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/movies/movies.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Movies </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/music/music.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Music </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="76" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/shopping/default.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Shopping </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/tv/tv.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> TV </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/news/news.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> News </font></a></td>
</tr>
</table>
<table cellpadding=0 cellspacing=0 border=0 bgcolor=#FFFFFFF width=608 height=2>
<tr valign="top" height="2"><td><img src="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" border="0" width="1" height="2"></td></tr></table>
<!--Misc PlaceHolder//-->
<!--PlaceHolder //-->
<!-- Main Content Start -->
<!-- tv section begin -->
<table border="0" cellspacing="0" cellpadding="0" height="777" width="608">
<tr colspan="7" valign="top">
<!-- Left Gutter of the Left Column -->
<td width="4" bgcolor="#CCCCFF"><img src="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" width="1" height="1"></td>
<!-- Left Column -->
<td width="139" bgcolor="#CCCCFF" height="100%" valign="top" align="left" >
<!-- Left Browse Bar Start -->
<!-- Left Browse Start -->
<table>
<p align="left">
<font size="6" face="Arial" color="#003366">
<strong>tv</strong>
</font><br>
<!-- section links -->
<p align="left">
<font size="4" face="Arial" color="#003366">
<strong>Today on TV</strong></font>
<br>
<font size="2" face="Arial" color="#9B0000">
• <a href="/web/20001109011800/http://go.msn.com/A5/D/5.asp?target=http://msn.gist.com/tv/index.jsp?ref=msn">All shows</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/tv/gist/TalkShow.asp">Talk shows</a><br><br>
</font>
<!--
• <a href="">Movies on TV</a><br>
• <a href="">Sports on TV</a><br>
</font>
</P>
--->
</table>
<!-- Brick Feed Box -->
<!-- begin Pipe email notification-->
<!-- include file ="email.jsp" -->
<!-- end Pipe email notification-->
<table width="139" border="0" cellpadding="0" cellspacing="0">
<tr>
<td width="139" colspan="5">
<a href="/web/20001109011800/http://www.gist.com/tv/index.jsp?ref=msn&origin=msnbrick" target="_top">
<img alt="go to gist.com" border="0" width="139" height="45" src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/cobrand/images/gistbox_top.gif"></a></td>
</tr>
<tr valign="top" align="left">
<td width="2" bgcolor="#000066">
<img height="1" src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/cobrand/images/white_space.gif" width="3"></td>
<td width="4">
<img height="1" src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/cobrand/images/white_space.gif" width="4"></td>
<td width="127" align="middle" valign="top">
<img src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/cobrand/images/white_space.gif" width="127" height="5"><br>
<a href="/web/20001109011800/http://clubs.gist.com/tvclubs/fanclub.jsp?fanclub=buffy&ref=msn&origin=msnbrick"><img src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/tv/images/msn_buffy.jpg" height=103 width=100 border=1 alt="JA"></a>
<font color="#0000ff" face="arial, helvetica" size="2">
<a href="/web/20001109011800/http://clubs.gist.com/tvclubs/fanclub.jsp?fanclub=buffy&ref=msn&origin=msnbrick"><br clear=all>
<b>Talk to Us!</b></a></font><br>
<font face="arial, helvetica" size="1">Be a <i>Buffy the Vampire Slayer</i> Fan of the Month!</font><br>
<font color="#0000ff" face="arial, helvetica" size="2">
<a href="/web/20001109011800/http://www.gist.com/tv/soaps/soaps.jsp?ref=msn&origin=msnbrick"><br clear=all>
<b>Soap Updates</b></a></font><br>
<font face="arial, helvetica" size="1">What Happened on Your Favorite Soap Today?</font><br>
<font color="#0000ff" face="arial, helvetica" size="2">
<a href="/web/20001109011800/http://www.gist.com/tv/hottopic.jsp?name=xfiles21&ref=msn&orgin=msnbrick"><br clear=all>
<b>Hot Topics</b></a></font><br>
<font face="arial, helvetica" size="1">Can Doggett Keep <i>The X-Files</i> Alive?</font><br>
</td>
<td width="4">
<img height="1" src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/cobrand/images/white_space.gif" width="2"></td>
<td width="2" bgcolor="#000066">
<img height="1" src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/cobrand/images/white_space.gif" width="3"></td>
</tr>
<tr>
<td width="139" colspan="5">
<a href="/web/20001109011800/http://www.gist.com/tv/index.jsp?ref=msn&origin=msnbrick" target="_top"><img alt="style" border="0" width="139" height="13" src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/cobrand/images/gistbox_bot2.gif"></a></td>
</tr>
</table>
<!-- where to buy section begin -->
<!-- "best buys" -->
<p><strong><font color="#003366" face="Arial" size="4">best buys</font></strong><br><font size="2" face="Arial" color="#9B0000">
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/subcategory.asp?subcatname=televisions&catname=electronics">TVs</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/subcategory.asp?subcatname=sattv&catname=electronics">Satellite TV</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/subcategory.asp?subcatname=vcrs&catname=electronics">VCRs</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/subcategory.asp?subcatname=dvdplayer&catname=electronics">DVD players</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/subcategory.asp?subcatname=hometheater&catname=electronics">Home theater</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/subcategory.asp?subcatname=camcorders&catname=electronics">Camcorders</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/subcategory.asp?subcatname=videogear&catname=electronics">Video gear</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/category.asp?catname=electronics">The complete electronics guide</a><br>
</font><br></P>
<!-- Contact us -->
<p align="left">
<font face="Arial" size="2" color="#9B0000">
• <a href="/web/20001109011800/http://entertainment.msn.com/help/help.asp"><strong>Contact us</strong></a>
</font>
</p>
<!-- Left Browse Bar End -->
<!-- reserved section begin -->
<!-- Reserved Blip -->
</td>
<!-- Right Gutter of the Left Browse Bar -->
<td width="4" bgcolor="#CCCCFF"><img src="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" width="1" height="1"></td>
<!-- Left Gutter of the Center Column -->
<td width="7" bgcolor="#FFFFFF"><img src="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" width="1" height="1"></td>
<!-- Left Browse Bar End -->
<!-- Begining of Center -->
<td width="320" bgcolor="#FFFFFF" height="100%" valign="top" align="left" >
<table border="0" cellspacing="0" cellpadding="0">
<tr>
<td width="100%" valign="top" align="left">
<!--center begin-->
<table border="0" cellspacing="0" cellpadding="0">
<tr>
<td width=100% valign="top" align="left">
<p align="left"><strong><font face="Arial" size="3" color="#5F5F9F">GIST TV news</font>
<font color="#9B0000" face="Arial" size="5"><br>
<i>90210</i>'s Thiessen Snags ABC Pilot
</font></strong><br>
</p>
<div align="left">
<table border="0" cellpadding="0" cellspacing="5" width="125" align="left">
<tr>
<td>
<!-- photo -->
<img src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/tv/general/images/4_tiffanithiessen.gif" width=132 height=115 alt="picture of Tiffani Thiessen"><BR><font face=verdana,helvetica,arial,times color=#37375F size=1>Tiffani Thiessen</font></td>
</tr>
</table></div>
<font face="Arial,helvetica" size="2">
Nov 2, 2000
—
Tiffani Thiessen has a new gig. The former <i>Beverly Hills, 90210</i> regular has signed up to star in an ABC pilot tentatively called <i>Something About Tiffani</i>, says <i>Variety</i>. Thiessen will play a beer-swilling, sharp-tongued woman who leaves her fiancé at the altar, goes to Italy for six months and returns home minus 70 pounds. Her three male college friends/housemates, who'd always thought of her as just one of guys, are blown away by her new sexy appearance.
<p>
"They're expecting the old Tiffani, and this hot babe walks in," said Mark Blutman, who will create, write and executive-produce the pilot with Howard Busgang. "Inside, she's still one of the guys: She's salty, she's ballsy. The dynamics of the house are thrown completely off the chart."
<p>
The show is penciled in on the network's fall 2001 schedule.
<p>
In addition to <i>90210</i>, Thiessen's résumé includes the ABC sitcom <i>Two Guys and a Girl</i>, NBC's Saturday morning teen comedy <i>Saved by the Bell</i> and a number of TV movies. —<i>Jenny Higgons, <nobr>Gist TV Staff</nobr></i><p></font><p>
<font face=verdana,helvetica,arial,times color=#37375F size=1>Photo courtesy of Fox</font>
<tr>
<td>
<br><br>
</td>
</tr>
</td>
</tr>
</table>
</td>
</td>
</tr>
</table>
<!-- Right Gutter of the Center Column -->
<!-- Partner Column -->
<!-- Right Gutter of the Center Column -->
<td width="6" bgcolor="#FFFFFF"><img src="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" width="1" height="1"></td>
<!-- Headlines -->
<td width="137" bgcolor="#FFFFFF" height="100%" valign="top" align="left" >
<table border="0" cellspacing="0" cellpadding="0">
<!--Insert "top headlines" Here -->
<table cellpadding="1">
<tr valign="top" align="right">
<td width="100%" valign="top" bgcolor="#CCCCFF" border="5">
<p align="left">
<font face="Arial" size="3" color="#003366"><strong><center>top headlines</center></strong></font>
<table width="138">
<tr>
<td valign="top"><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1108/jseinfeld.asp"><font SIZE="2" face="Arial" color="#000080">Seinfeld's baby girl </font></a></td>
</tr>
<tr>
<td valign="top" height="34"><font face="Arial" size="2" color="#990000">•</font></td><td height="34"><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1108/cmanheim.asp"><font SIZE="2" face="Arial" color="#000080">Oh, baby! Camryn Manheim expecting</font></a></td>
</tr>
</table>
<table width="138">
<tr>
<td valign="top" height="34"><font face="Arial" size="2" color="#990000">•</font></td><td height="34"><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/tv/gist/wed03.asp"><font SIZE="2" face="Arial" color="#000080"><i>Frasier'</i>s Dr. Laura spoof
M.I.A.</font></a></td>
</tr>
<tr>
<td valign="top" height="34"><font face="Arial" size="2" color="#990000">•</font></td><td height="34" valign="top"><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1108/oj.asp"><font SIZE="2" face="Arial" color="#000080">Judge
boots<br>
O.J. suit</font></a></td>
</tr>
<tr>
<td valign="top" height="34"><font face="Arial" size="2" color="#990000">•</font></td><td height="34"><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/tv/gist/wed02.asp"><font SIZE="2" face="Arial" color="#000080"><i>Survivor</i>'s Richard undergoes lipo</font></a></td>
</tr>
<tr>
<td valign="top"><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1108/trailers.asp"><font SIZE="2" face="Arial"><font color="#000080">Theater owners take out trailer trash</font> </font></a></td>
</tr>
</table>
<table width="138">
<tr valign="top">
<td><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1107/rosie.asp"><font SIZE="2" face="Arial" color="#000080">Rosie's
political ultimatum</font></a></td>
</tr>
</table>
<table width="138">
<tr>
<td valign="top"><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1107/grinch.asp"><font face="Arial" size="2" color="#000080">Mrs.
Seuss: Whoville is not Pooh-ville</font></a></td>
</tr>
<tr>
<td valign="top"><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1107/madonna.asp"><font face="Arial" size="2" color="#000080">Madonna
makes some live <i>Music</i></font></a></td>
</tr>
<tr valign="top">
<td><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1107/leo.asp"><font SIZE="2" face="Arial" color="#000080">Leo
running late for next film</font></a></td>
</tr>
<tr valign="top">
<td><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/music/news/tue01.asp"><font SIZE="2" face="Arial" color="#000080">Pixies
to reunite -- partially?</font></a></td>
</tr>
</table>
<table width="138">
<tr>
<td valign="top"><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1106/scooby.asp"><font face="Arial" size="2" color="#000080">Gellar,
Prinze may help <i>Scooby</i></font></a></td>
</tr>
<tr valign="top">
<td><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/CSspecial/box_office/"><font SIZE="2" face="Arial" color="#000080">Devilish
<i>Angels</i> soars at box office</font></a></td>
</tr>
<tr valign="top">
<td><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1106/mcrichton.asp"><font SIZE="2" face="Arial" color="#000080">Crichton
gets medieval on PCs</font></a></td>
</tr>
</table>
<table width="138">
<tr valign="top">
<td cellspacing="0" cellpadding="0">
<hr color="#000000"> <center>
<font face="Arial" size="2" color="#000000ss">
This site is best viewed with<br>
MSN Explorer.</font><br>
<a href="/web/20001109011800/http://explorer.msn.com/">
<img src="/web/20001109011800im_/http://entertainment.msn.com/images/animated_highlight_tm_free.gif" border="0"></a></center>
</td>
</tr>
</table>
<!-- vertical space between boxes -->
<tr height="3"><td height="3" width="100%">
</td></tr>
</table>
</td>
</TR>
</TD>
</tr>
</table>
<!-- Reserved Blip -->
<!-- Reserved Blip -->
<!-- Main Content End -->
<table cellpadding=0 cellspacing=0 border=0 bgcolor=#FFFFFFF width=608 height=2>
<tr valign="top" height="2"><td><img src="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" border="0" width="1" height="2"></td></tr></table>
<!--Bottom Navigation bar from the network template//-->
<!--Repeated Toolbar Blip//-->
<!--Network ToolBar//-->
<table cellpadding=0 cellspacing=0 border=0 bgcolor=#336699 width=608 height=17>
<tr>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Home </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/books/books.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Books </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/celebs/celebs.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Celebs </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/movies/movies.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Movies </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/music/music.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Music </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="76" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/shopping/default.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Shopping </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/tv/tv.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> TV </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/news/news.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> News </font></a></td>
</tr>
</table>
<table cellpadding=0 cellspacing=0 border=0 bgcolor=#FFFFFFF width=608 height=2>
<tr valign="top" height="2"><td><img src="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" border="0" width="1" height="2"></td></tr></table>
<!--Network Footers//-->
<!--Footer 1 //-->
<table cellpadding=0 cellspacing=0 border=0 width=608>
<tr>
<td width=208 valign=top align=left>
<A HREF="/web/20001109011800/http://go.msn.com/A5/1/"><IMG SRC="/web/20001109011800im_/http://entertainment.msn.com/images/logo.gif" WIDTH=140 HEIGHT=60 BORDER=0 ALT="go to MSN.com" align=middle></A> <table cellpadding=0 cellspacing=0 border=0>
<tr>
<td width=20> </td>
<td align=left valign=top>
<font size=2 face=arial,sans-serif class=rL>Return to <b><a href="/web/20001109011800/http://entertainment.msn.com/" class=rL>Entertainment</a></b></font></td>
<td width=20> </td>
</tr>
</table></td>
<td width=190 valign=top align=left>
<font size=2 face=arial,sans-serif><b>Other Links:</b></font>
<hr size=1 color=#000000 noshade>
<font size=1 face=arial,sans-serif>
<a href="/web/20001109011800/http://go.msn.com/A5/2/WOMEN.ASP" class=fL>Women</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/2/CHAT.ASP" class=fL>Chat</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/2/FREEGAMES.ASP" class=fL>Free Games</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/2/DOWNLOADS.ASP" class=fL>Downloads</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/2/RESEARCH.ASP" class=fL>Research & School</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/2/MORE.ASP" class=fL>More...</a><br><br>
</font>
</td>
<td width=20></td>
<td width=190 valign=top align=left>
<font size=2 face="arial,sans-serif"><b>Special Features:</b></font>
<hr size=1 color=#000000 noshade>
<a href="/web/20001109011800/http://go.msn.com/A5/3/it1.asp?target=http://moneycentral.msn.com/insure/insur.asp?itype=auto" class="fL">Getting scammed on auto insurance?</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/3/it2.asp?target=http://entertainment.msn.com/special/fall_entertainment_guide/default.asp" class="fL">What's HOT and NOT this Fall</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/3/it3.asp?target=http://cagle.slate.msn.com/" class="fL">Looking for a new perspective?</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/3/it4.asp?target=http://preview.msn.com/" class="fL">How to g
Traceback (most recent call last):
File "<pyshell#7>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#6>", line 67, in take_measurements
universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
File "<pyshell#6>", line 16, in tokens_to_hist_from_universe
parsed_goldhtml = lxml.html.parse(data_file,
NameError: global name 'lxml' is not defined
et more out of the web</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/3/it5.asp?target=http://chat.msn.com/features/categories/music.asp" class="fL">Add music to your favorite chat!</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/3/it10.asp?target=http://update.msn.com/quicklinks/default.asp" class="fL">More...</a><br><br>
</td>
</tr>
</table>
<!--Network Footers Tems and Conditions//-->
<!--Network Footers Tems and Conditions//-->
<table cellpadding=0 cellspacing=0 border=0 width=608>
<tr>
<td height=25 bgcolor=#000000 align=center class=fTOU >
<font size=1 face=arial,sans-serif color=#FFFFFF> ©2000 Microsoft Corporation. All rights reserved. </font>
<a href="/web/20001109011800/http://go.msn.com/A5/4/"><font size=1 face=arial,sans-serif color=#FFFFFF>Terms of Use</font></a>
<a href="/web/20001109011800/http://go.msn.com/A5/5/"><font size=1 face=arial,sans-serif color=#FFFFFF>Advertise</font></a>
<a href="/web/20001109011800/http://go.msn.com/A5/6/"><font size=1 face=arial,sans-serif color=#FFFFFF>TRUSTe Approved Privacy Statement</font></a>
</td>
</tr>
</table>
<!--Network Template Sidebar script//-->
<!--Network Template Sidebar Cell//-->
</TD>
<TD valign="top">
<iframe id=sidebar style="display: none" src="/web/20001109011800if_/http://arc1.msn.com/ADSAdClient31.dll?GetAd?PG=PROENT?SC=D1" border=0 frameborder=0 scrolling=no marginheight=0 marginwidth=0 width=160 height=600>
<a href="/web/20001109011800/http://arc1.msn.com/Clicker/ADSClicker31.dll?Redirect?PG=PROENT?AN=1.0">
<img src="/web/20001109011800im_/http://arc1.msn.com/ADSAdClient31.dll?GetImage?PG=PROENT?SC=D1?AN=1.0" width=160 height=600 border=0></a>
</iframe>
</TD>
<!--Network Template Sidebar Cell//-->
<!--Network Template Sidebar script//-->
<script language="Javascript">
<!--
function showHideSidebar() {
if(document.body) var iWidth = document.body.clientWidth;
if(document.all) document.all.sidebar.style.display = (iWidth > 753 ? '' :'none');
}
showHideSidebar();
function errortrap(msg,url,line){
return true;
}
onerror=errortrap;
//-->
</script>
<script language="javascript" type="text/javascript" for="window" event="onresize">
<!--
showHideSidebar();
function errortrap(msg,url,line){
return true;
}
onerror=errortrap;
//-->
</script>
<!--Network Template table bottom //-->
<!--Network Template Table bottom //-->
</TD>
</TR>
</TABLE>
</TD>
</TR>
</TABLE>
</BODY>
</HTML>
<!--
FILE ARCHIVED ON 1:18:00 Nov 9, 2000 AND RETRIEVED FROM THE
INTERNET ARCHIVE ON 4:04:57 Jan 8, 2015.
JAVASCRIPT APPENDED BY WAYBACK MACHINE, COPYRIGHT INTERNET ARCHIVE.
ALL OTHER CONTENT MAY ALSO BE PROTECTED BY COPYRIGHT (17 U.S.C.
SECTION 108(a)(3)).
-->
>>> import pickle
... import re
... import os
... import collections
... import lxml
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
<HTML>
<HEAD>
<script type="text/javascript" src="/static/js/analytics.js"></script>
<script type="text/javascript">archive_analytics.values.server_name="wwwb-app19.us.archive.org";archive_analytics.values.server_ms=184;</script>
<link type="text/css" rel="stylesheet" href="/static/css/banner-styles.css"/>
<META HTTP-EQUIV="Description" NAME="Description" CONTENT="MSN Entertainment Channel brings you the latest news on Books, Music, Movies, and TV Shows. The best movie reviews, great CD and book reviews, & entertainment chat rooms, get the latest word on movie stars, musical artists, & other celebrities.">
<META HTTP-EQUIV="Keywords" NAME="Keywords" CONTENT="chat, music, chat rooms, movies, news, chatrooms, chat room, books, food, celebrities, chatroom, video, videos, movie, movie reviews, movie times, hollywood, free chat, free chat rooms, microsoft chat, cameras, movie stars, tv, book reviews, tv listings, msn chat, book, tv shows, gossip, books on tape, entertainment news">
<META name = "description" Content= "TV - What's On Right Now, Top Story, Gossip & News, Week's Top 10 Television Shows (Nielsen Ratings), Today's TV Talk (Chat Rooms), Cool Clicks - links to web's coolest TV sites & best selling videos.">
<Meta name="Keywords" content="<i>The 72nd Academy Awards</i> ,<i>Countdown to Oscar 2000</i>,<i>Who Wants to be a Millionaire</i>,<i>E.R.</i>,<i>Who Wants to be a Millionaire</i>,<I>Who Wants to be a Millionaire</I>,<i>Friends</i>,<i>Frasier</i>,<i>Daddio</i> ,<i>Law & Order</i>,, Television, tv, tv listings, tv shows, stars, movie stars, moviestars, talk shows, television shows, drama, comedy, video, videos">
<META http-equiv="PICS-Label" content='(PICS-1.0 "http://www.rsac.org/ratingsv01.html" l comment "RSACi North America Server" by "Microsoft Network" on "1996.04.16T08:15-0500" r (n 0 s 0 v 0 l 0))'>
<TITLE>MSN Entertainment TV Channel</TITLE>
</HEAD>
<!--StyleSheet//-->
<STYLE>
<!--
A {color:#000000;}
A:hover {color:#FF3300;}
A:active {color:color:#000000;}
A:visited {color:color:#000000;}
A:visited:hover {color:#FF3300;}
.rL{color:black;text-decoration:none;font-family:Arial;font-size:10pt;}
.tL {font-face:verdana,sans-serif;color:#000000;}
.tL:hover{color:#FF3300;}
.tL:active{color:#000000;}
.tL:visited{color:#000000;}
.tL:visited:hover{color:#FF3300;}
.ttL {font-face:arial,sans-serif;color:#000000;text-decoration:none;}
.ttL:hover{color:#FF3300;text-decoration:none;}
.ttL:active{color:#000000;}
.ttL:visited{color:#000000;}
.ttL:visited:hover{color:#FF3300;text-decoration:none;}
.tsL {font-face:arial,sans-serif;color:#CC9900;text-decoration:none;}
.tsL:hover{color:#FF3300;text-decoration:underline;}
.tsL:active{color:#CC9900;}
.tsL:visited{color:#CC9900;}
.tsL:visited:hover{color:#FF3300;text-decoration:underline;}
.t{font:bold small arial,sans-serif;color:#FFFFFF;text-decoration:none;font-size: 10pt;}
.tS{font:bold small arial,sans-serif;color:#FFFFFF;text-decoration:none;font-size: 10pt;}
.fL{font-face:arial,sans-serif;font-size: 8pt;color:black;}
.fL:hover{color:#FF3300;}
.fL:active{color:#000000;}
.fL:visited{color:#000000;}
.fL:visited:hover{color:#FF3300;}
//-->
</STYLE>
<!--Site Parameters//-->
<BODY BGCOLOR="#FFFFFF" TOPMARGIN=0 LEFTMARGIN=0 TEXT="#000000" LINK="#000000" VLINK="#000000" ALINK="#000000">
<!-- BEGIN WAYBACK TOOLBAR INSERT -->
<script type="text/javascript" src="/static/js/disclaim-element.js" ></script>
<script type="text/javascript" src="/static/js/graph-calc.js" ></script>
<script type="text/javascript" src="/static/jflot/jquery.min.js" ></script>
<script type="text/javascript">//<![CDATA[
var __wm = (function(){
var wbPrefix = "/web/";
var wbCurrentUrl = "http://entertainment.msn.com/tv/gist/fri02.asp";
var firstYear = 1996;
var imgWidth = 500,imgHeight = 27;
var yearImgWidth = 25,monthImgWidth = 2;
var displayDay = "9";
var displayMonth = "Nov";
var displayYear = "2000";
var prettyMonths = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"];
var $D=document,$=function(n){return document.getElementById(n)};
var trackerVal,curYear = -1,curMonth = -1;
var yearTracker,monthTracker;
function showTrackers(val) {
if (val===trackerVal) return;
var $ipp=$("wm-ipp");
var $y=$("displayYearEl"),$m=$("displayMonthEl"),$d=$("displayDayEl");
if (val) {
$ipp.className="hi";
} else {
$ipp.className="";
$y.innerHTML=displayYear;$m.innerHTML=displayMonth;$d.innerHTML=displayDay;
}
yearTracker.style.display=val?"inline":"none";
monthTracker.style.display=val?"inline":"none";
trackerVal = val;
}
function getElementX2(obj) {
var $e=jQuery(obj);
return (typeof $e=="undefined"||typeof $e.offset=="undefined")?
getElementX(obj):Math.round($e.offset().left);
}
function trackMouseMove(event,element) {
var eventX = getEventX(event);
var elementX = getElementX2(element);
var xOff = Math.min(Math.max(0, eventX - elementX),imgWidth);
var monthOff = xOff % yearImgWidth;
var year = Math.floor(xOff / yearImgWidth);
var monthOfYear = Math.min(11,Math.floor(monthOff / monthImgWidth));
// 1 extra border pixel at the left edge of the year:
var month = (year * 12) + monthOfYear;
var day = monthOff % 2==1?15:1;
var dateString = zeroPad(year + firstYear) + zeroPad(monthOfYear+1,2) +
zeroPad(day,2) + "000000";
$("displayYearEl").innerHTML=year+firstYear;
$("displayMonthEl").innerHTML=prettyMonths[monthOfYear];
// looks too jarring when it changes..
//$("displayDayEl").innerHTML=zeroPad(day,2);
var url = wbPrefix + dateString + '/' + wbCurrentUrl;
$("wm-graph-anchor").href=url;
if(curYear != year) {
var yrOff = year * yearImgWidth;
yearTracker.style.left = yrOff + "px";
curYear = year;
}
if(curMonth != month) {
var mtOff = year + (month * monthImgWidth) + 1;
monthTracker.style.left = mtOff + "px";
curMonth = month;
}
}
function hideToolbar() {
$("wm-ipp").style.display="none";
}
function bootstrap() {
var $spk=$("wm-ipp-sparkline");
yearTracker=$D.createElement('div');
yearTracker.className='yt';
with(yearTracker.style){
display='none';width=yearImgWidth+"px";height=imgHeight+"px";
}
monthTracker=$D.createElement('div');
monthTracker.className='mt';
with(monthTracker.style){
display='none';width=monthImgWidth+"px";height=imgHeight+"px";
}
$spk.appendChild(yearTracker);
$spk.appendChild(monthTracker);
var $ipp=$("wm-ipp");
$ipp&&disclaimElement($ipp);
}
return{st:showTrackers,mv:trackMouseMove,h:hideToolbar,bt:bootstrap};
})();//]]>
</script>
<style type="text/css">
body {
margin-top:0 !important;
padding-top:0 !important;
min-width:800px !important;
}
</style>
<div id="wm-ipp" lang="en" style="display:none;">
<div style="position:fixed;left:0;top:0;width:100%!important">
<div id="wm-ipp-inside">
<table style="width:100%;"><tbody><tr>
<td id="wm-logo">
<a href="/web/" title="Wayback Machine home page"><img src="/static/images/toolbar/wayback-toolbar-logo.png" alt="Wayback Machine" width="110" height="39" border="0" /></a>
</td>
<td class="c">
<table style="margin:0 auto;"><tbody><tr>
<td class="u" colspan="2">
<form target="_top" method="get" action="/web/form-submit.jsp" name="wmtb" id="wmtb"><input type="text" name="url" id="wmtbURL" value="http://entertainment.msn.com/tv/gist/fri02.asp" style="width:400px;" onfocus="this.focus();this.select();" /><input type="hidden" name="type" value="replay" /><input type="hidden" name="date" value="20001109011800" /><input type="submit" value="Go" /><span id="wm_tb_options" style="display:block;"></span></form>
</td>
<td class="n" rowspan="2">
<table><tbody>
<!-- NEXT/PREV MONTH NAV AND MONTH INDICATOR -->
<tr class="m">
<td class="b" nowrap="nowrap">
<a href="/web/20000818044432/http://entertainment.msn.com/tv/gist/fri02.asp" title="18 Aug 2000">AUG</a>
</td>
<td class="c" id="displayMonthEl" title="You are here: 1:18:00 Nov 9, 2000">NOV</td>
<td class="f" nowrap="nowrap">
<a href="/web/20010212142755/http://entertainment.msn.com/tv/gist/fri02.asp" title="12 Feb 2001"><strong>FEB</strong></a>
</td>
</tr>
<!-- NEXT/PREV CAPTURE NAV AND DAY OF MONTH INDICATOR -->
<tr class="d">
<td class="b" nowrap="nowrap">
<a href="/web/20001017145824/http://entertainment.msn.com/tv/gist/fri02.asp" title="14:58:24 Oct 17, 2000"><img src="/static/images/toolbar/wm_tb_prv_on.png" alt="Previous capture" width="14" height="16" border="0" /></a>
</td>
<td class="c" id="displayDayEl" style="width:34px;font-size:24px;" title="You are here: 1:18:00 Nov 9, 2000">9</td>
<td class="f" nowrap="nowrap">
<a href="/web/20001204130300/http://www.entertainment.msn.com/tv/gist/fri02.asp" title="13:03:00 Dec 4, 2000"><img src="/static/images/toolbar/wm_tb_nxt_on.png" alt="Next capture" width="14" height="16" border="0" /></a>
</td>
</tr>
<!-- NEXT/PREV YEAR NAV AND YEAR INDICATOR -->
<tr class="y">
<td class="b" nowrap="nowrap">
1999
</td>
<td class="c" id="displayYearEl" title="You are here: 1:18:00 Nov 9, 2000">2000</td>
<td class="f" nowrap="nowrap">
<a href="/web/20030301014912/http://entertainment.msn.com/tv/gist/fri02.asp" title="1 Mar 2003"><strong>2003</strong></a>
</td>
</tr>
</tbody></table>
</td>
</tr>
<tr>
<td class="s">
<a class="t" href="/web/20001109011800*/http://entertainment.msn.com/tv/gist/fri02.asp" title="See a list of every capture for this URL">20 captures</a>
<div class="r" title="Timespan for captures of this URL">10 May 00 - 5 Dec 04</div>
</td>
<td class="k">
<a href="" id="wm-graph-anchor">
<div id="wm-ipp-sparkline" title="Explore captures for this URL">
<img id="sparklineImgId" alt="sparklines"
onmouseover="__wm.st(1)" onmouseout="__wm.st(0)"
onmousemove="__wm.mv(event,this)"
width="500"
height="27"
border="0"
src="/web/jsp/graph.jsp?graphdata=500_27_1996:-1:000000000000_1997:-1:000000000000_1998:-1:000000000000_1999:-1:000000000000_2000:10:000032010312_2001:-1:010000000000_2002:-1:000000000000_2003:-1:001001001010_2004:-1:000010010001_2005:-1:000000000000_2006:-1:000000000000_2007:-1:000000000000_2008:-1:000000000000_2009:-1:000000000000_2010:-1:000000000000_2011:-1:000000000000_2012:-1:000000000000_2013:-1:000000000000_2014:-1:000000000000_2015:-1:000000000000" />
</div>
</a>
</td>
</tr></tbody></table>
</td>
<td class="r">
<a href="#close" onclick="__wm.h();return false;" style="background-image:url(/static/images/toolbar/wm_tb_close.png);top:5px;" title="Close the toolbar">Close</a>
<a href="http://faq.web.archive.org/" style="background-image:url(/static/images/toolbar/wm_tb_help.png);bottom:5px;" title="Get some help using the Wayback Machine">Help</a>
</td>
</tr></tbody></table>
</div>
</div>
</div>
<script type="text/javascript">__wm.bt();</script>
<!-- END WAYBACK TOOLBAR INSERT -->
<BASEFONT FACE="ARIAL,SANS-SERIF" SIZE=2>
<!-- Custom Codes for the site //-->
<!--Networktable_Top//-->
<!--Network Container Table //-->
<TABLE CELLPADDING=0 CELLSPACING=0 BORDER=0 BGCOLOR="#FFFFFF">
<TR>
<TD VALIGN="TOP">
<!--Network Header Blips//-->
<!--Network Header Blips//-->
<table cellpadding=0 cellspacing=0 border=0>
<tr>
<td height=2><IMG BORDER="0" SRC="/web/20001109011800im_/http://entertainment.msn.com/images/spacer01.gif" WIDTH="1" HEIGHT="1" ALT=""></td>
</tr>
<tr>
<td>
<table cellpadding=0 cellspacing=0 border=0 width=608>
<tr>
<td width=1 bgcolor=#FFFFFF></td>
<td width=92 align=center><font size=2 face=verdana,sans-serif color=#000000><a href="/web/20001109011800/http://go.msn.com/A5/7/" class=tL><b>MSN Home</b></a></font></td>
<td width=1 bgcolor=#000000></td>
<td width=72 align=center><font size=2 face=verdana,sans-serif color=#000000><a href="/web/20001109011800/http://go.msn.com/A5/8/" class=tL><b>Hotmail</b></a></font></td>
<td width=1 bgcolor=#000000></td>
<td width=100 align=center><font size=2 face=verdana,sans-serif color=#000000><a href="/web/20001109011800/http://go.msn.com/A5/A/" class=tL><b>Web Search</b></a></font></td>
<td width=1 bgcolor=#000000></td>
<td width=81 align=center><font size=2 face=verdana,sans-serif color=#000000><a href="/web/20001109011800/http://go.msn.com/A5/9/" class=tL><b>Shopping</b></a></font></td>
<td width=1 bgcolor=#000000></td>
<td width=62 align=center><font size=2 face=verdana,sans-serif color=#000000><a href="/web/20001109011800/http://go.msn.com/A5/C/" class=tL><b>Money</b></a></font></td>
<td width=1 bgcolor=#000000></td>
<td width=116 align=center><font size=2 face=verdana,sans-serif color=#000000><a href="/web/20001109011800/http://go.msn.com/A5/B/" class=tL><b>People & Chat</b></a></font></td>
<td width=1 bgcolor=#000000></td>
<td width=77 align=right>
<img src=/web/20001109011800im_/http://entertainment.msn.com/images/spacer01.gif WIDTH=67 HEIGHT=19 BORDER=0 HSPACE=10>
</td>
<td width=1 bgcolor=#000000></td>
</tr>
</table>
</td>
</tr>
<tr>
<td height=2><IMG BORDER="0" SRC="/web/20001109011800im_/http://entertainment.msn.com/images/spacer01.gif" WIDTH="1" HEIGHT="1" ALT=""></td>
</tr>
</table>
<table cellpadding=0 cellspacing=0 border=0 width=608>
<tr>
<td height=60> <A HREF="/web/20001109011800/http://go.msn.com/A5/0/"><IMG SRC="/web/20001109011800im_/http://entertainment.msn.com/images/logo.gif" WIDTH=140 HEIGHT=60 BORDER=0 ALT="go to MSN.com" align=middle></A> </td>
<td height=60 width=468><A HREF="/web/20001109011800/http://ads.msn.com/ads/redirect.dll/CID=0009e0c897844cf000000000/AREA=ENTGEN?image=http://ads.msn.com/ads/ENTGEN/00482CP0096_LG.gif" TARGET="_top"><IMG SRC="/web/20001109011800im_/http://ads.msn.com/ads/ENTGEN/00482CP0096_LG.gif" ALT="Win a Hummer for a Weekend!" BORDER=0 WIDTH=468 HEIGHT=60></A></td>
</tr>
<tr>
<td height=1 colspan=2></td>
</tr>
</table>
<!--PlaceHolder //-->
<!-- Header 4// -->
<table cellpadding=0 cellspacing=0 border=0 width=608 bordercolor=#FFFFFF>
<tr>
<td width=300 height=32 align=left bgcolor=#FFFFFF><font size=5 face=arial,sans-serif color=#000000><b><a href="/web/20001109011800/http://entertainment.msn.com/" class=ttL> Entertainment</a></b></font></td>
<td width=100% align=left valign=middle bgcolor=#FFFFFF><IMG BORDER="0" SRC="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" WIDTH="1" HEIGHT="1" ALT=""></td>
</tr>
</table>
<!--Network ToolBar Functions//-->
<!--Network ToolBar Functions//-->
<script language="Javascript">
<!--
function mOvr(src,clrOver){
if (!src.contains(event.fromElement)){
src.style.cursor = 'hand'; src.bgColor = clrOver;
}
}
function mOut(src,clrIn){
if (!src.contains(event.toElement)){
src.style.cursor = 'default';
src.bgColor = clrIn;
}
}
function mClk(src){
if(event.srcElement.tagName=='TD'){
src.children.tags('A')[0].click();
}
}
//-->
</script>
<!--Network ToolBar//-->
<!--Network ToolBar//-->
<table cellpadding=0 cellspacing=0 border=0 bgcolor=#336699 width=608 height=17>
<tr>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Home </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/books/books.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Books </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/celebs/celebs.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Celebs </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/movies/movies.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Movies </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/music/music.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Music </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="76" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/shopping/default.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Shopping </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/tv/tv.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> TV </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/news/news.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> News </font></a></td>
</tr>
</table>
<table cellpadding=0 cellspacing=0 border=0 bgcolor=#FFFFFFF width=608 height=2>
<tr valign="top" height="2"><td><img src="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" border="0" width="1" height="2"></td></tr></table>
<!--Misc PlaceHolder//-->
<!--PlaceHolder //-->
<!-- Main Content Start -->
<!-- tv section begin -->
<table border="0" cellspacing="0" cellpadding="0" height="777" width="608">
<tr colspan="7" valign="top">
<!-- Left Gutter of the Left Column -->
<td width="4" bgcolor="#CCCCFF"><img src="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" width="1" height="1"></td>
<!-- Left Column -->
<td width="139" bgcolor="#CCCCFF" height="100%" valign="top" align="left" >
<!-- Left Browse Bar Start -->
<!-- Left Browse Start -->
<table>
<p align="left">
<font size="6" face="Arial" color="#003366">
<strong>tv</strong>
</font><br>
<!-- section links -->
<p align="left">
<font size="4" face="Arial" color="#003366">
<strong>Today on TV</strong></font>
<br>
<font size="2" face="Arial" color="#9B0000">
• <a href="/web/20001109011800/http://go.msn.com/A5/D/5.asp?target=http://msn.gist.com/tv/index.jsp?ref=msn">All shows</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/tv/gist/TalkShow.asp">Talk shows</a><br><br>
</font>
<!--
• <a href="">Movies on TV</a><br>
• <a href="">Sports on TV</a><br>
</font>
</P>
--->
</table>
<!-- Brick Feed Box -->
<!-- begin Pipe email notification-->
<!-- include file ="email.jsp" -->
<!-- end Pipe email notification-->
<table width="139" border="0" cellpadding="0" cellspacing="0">
<tr>
<td width="139" colspan="5">
<a href="/web/20001109011800/http://www.gist.com/tv/index.jsp?ref=msn&origin=msnbrick" target="_top">
<img alt="go to gist.com" border="0" width="139" height="45" src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/cobrand/images/gistbox_top.gif"></a></td>
</tr>
<tr valign="top" align="left">
<td width="2" bgcolor="#000066">
<img height="1" src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/cobrand/images/white_space.gif" width="3"></td>
<td width="4">
<img height="1" src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/cobrand/images/white_space.gif" width="4"></td>
<td width="127" align="middle" valign="top">
<img src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/cobrand/images/white_space.gif" width="127" height="5"><br>
<a href="/web/20001109011800/http://clubs.gist.com/tvclubs/fanclub.jsp?fanclub=buffy&ref=msn&origin=msnbrick"><img src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/tv/images/msn_buffy.jpg" height=103 width=100 border=1 alt="JA"></a>
<font color="#0000ff" face="arial, helvetica" size="2">
<a href="/web/20001109011800/http://clubs.gist.com/tvclubs/fanclub.jsp?fanclub=buffy&ref=msn&origin=msnbrick"><br clear=all>
<b>Talk to Us!</b></a></font><br>
<font face="arial, helvetica" size="1">Be a <i>Buffy the Vampire Slayer</i> Fan of the Month!</font><br>
<font color="#0000ff" face="arial, helvetica" size="2">
<a href="/web/20001109011800/http://www.gist.com/tv/soaps/soaps.jsp?ref=msn&origin=msnbrick"><br clear=all>
<b>Soap Updates</b></a></font><br>
<font face="arial, helvetica" size="1">What Happened on Your Favorite Soap Today?</font><br>
<font color="#0000ff" face="arial, helvetica" size="2">
<a href="/web/20001109011800/http://www.gist.com/tv/hottopic.jsp?name=xfiles21&ref=msn&orgin=msnbrick"><br clear=all>
<b>Hot Topics</b></a></font><br>
<font face="arial, helvetica" size="1">Can Doggett Keep <i>The X-Files</i> Alive?</font><br>
</td>
<td width="4">
<img height="1" src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/cobrand/images/white_space.gif" width="2"></td>
<td width="2" bgcolor="#000066">
<img height="1" src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/cobrand/images/white_space.gif" width="3"></td>
</tr>
<tr>
<td width="139" colspan="5">
<a href="/web/20001109011800/http://www.gist.com/tv/index.jsp?ref=msn&origin=msnbrick" target="_top"><img alt="style" border="0" width="139" height="13" src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/cobrand/images/gistbox_bot2.gif"></a></td>
</tr>
</table>
<!-- where to buy section begin -->
<!-- "best buys" -->
<p><strong><font color="#003366" face="Arial" size="4">best buys</font></strong><br><font size="2" face="Arial" color="#9B0000">
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/subcategory.asp?subcatname=televisions&catname=electronics">TVs</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/subcategory.asp?subcatname=sattv&catname=electronics">Satellite TV</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/subcategory.asp?subcatname=vcrs&catname=electronics">VCRs</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/subcategory.asp?subcatname=dvdplayer&catname=electronics">DVD players</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/subcategory.asp?subcatname=hometheater&catname=electronics">Home theater</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/subcategory.asp?subcatname=camcorders&catname=electronics">Camcorders</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/subcategory.asp?subcatname=videogear&catname=electronics">Video gear</a><br>
• <a href="/web/20001109011800/http://entertainment.msn.com/shopping/category.asp?catname=electronics">The complete electronics guide</a><br>
</font><br></P>
<!-- Contact us -->
<p align="left">
<font face="Arial" size="2" color="#9B0000">
• <a href="/web/20001109011800/http://entertainment.msn.com/help/help.asp"><strong>Contact us</strong></a>
</font>
</p>
<!-- Left Browse Bar End -->
<!-- reserved section begin -->
<!-- Reserved Blip -->
</td>
<!-- Right Gutter of the Left Browse Bar -->
<td width="4" bgcolor="#CCCCFF"><img src="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" width="1" height="1"></td>
<!-- Left Gutter of the Center Column -->
<td width="7" bgcolor="#FFFFFF"><img src="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" width="1" height="1"></td>
<!-- Left Browse Bar End -->
<!-- Begining of Center -->
<td width="320" bgcolor="#FFFFFF" height="100%" valign="top" align="left" >
<table border="0" cellspacing="0" cellpadding="0">
<tr>
<td width="100%" valign="top" align="left">
<!--center begin-->
<table border="0" cellspacing="0" cellpadding="0">
<tr>
<td width=100% valign="top" align="left">
<p align="left"><strong><font face="Arial" size="3" color="#5F5F9F">GIST TV news</font>
<font color="#9B0000" face="Arial" size="5"><br>
<i>90210</i>'s Thiessen Snags ABC Pilot
</font></strong><br>
</p>
<div align="left">
<table border="0" cellpadding="0" cellspacing="5" width="125" align="left">
<tr>
<td>
<!-- photo -->
<img src="/web/20001109011800im_/http://a296.g.akamai.net/f/296/501/1d/www.gist.com/tv/general/images/4_tiffanithiessen.gif" width=132 height=115 alt="picture of Tiffani Thiessen"><BR><font face=verdana,helvetica,arial,times color=#37375F size=1>Tiffani Thiessen</font></td>
</tr>
</table></div>
<font face="Arial,helvetica" size="2">
Nov 2, 2000
—
Tiffani Thiessen has a new gig. The former <i>Beverly Hills, 90210</i> regular has signed up to star in an ABC pilot tentatively called <i>Something About Tiffani</i>, says <i>Variety</i>. Thiessen will play a beer-swilling, sharp-tongued woman who leaves her fiancé at the altar, goes to Italy for six months and returns home minus 70 pounds. Her three male college friends/housemates, who'd always thought of her as just one of guys, are blown away by her new sexy appearance.
<p>
"They're expecting the old Tiffani, and this hot babe walks in," said Mark Blutman, who will create, write and executive-produce the pilot with Howard Busgang. "Inside, she's still one of the guys: She's salty, she's ballsy. The dynamics of the house are thrown completely off the chart."
<p>
The show is penciled in on the network's fall 2001 schedule.
<p>
In addition to <i>90210</i>, Thiessen's résumé includes the ABC sitcom <i>Two Guys and a Girl</i>, NBC's Saturday morning teen comedy <i>Saved by the Bell</i> and a number of TV movies. —<i>Jenny Higgons, <nobr>Gist TV Staff</nobr></i><p></font><p>
<font face=verdana,helvetica,arial,times color=#37375F size=1>Photo courtesy of Fox</font>
<tr>
<td>
<br><br>
</td>
</tr>
</td>
</tr>
</table>
</td>
</td>
</tr>
</table>
<!-- Right Gutter of the Center Column -->
<!-- Partner Column -->
<!-- Right Gutter of the Center Column -->
<td width="6" bgcolor="#FFFFFF"><img src="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" width="1" height="1"></td>
<!-- Headlines -->
<td width="137" bgcolor="#FFFFFF" height="100%" valign="top" align="left" >
<table border="0" cellspacing="0" cellpadding="0">
<!--Insert "top headlines" Here -->
<table cellpadding="1">
<tr valign="top" align="right">
<td width="100%" valign="top" bgcolor="#CCCCFF" border="5">
<p align="left">
<font face="Arial" size="3" color="#003366"><strong><center>top headlines</center></strong></font>
<table width="138">
<tr>
<td valign="top"><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1108/jseinfeld.asp"><font SIZE="2" face="Arial" color="#000080">Seinfeld's baby girl </font></a></td>
</tr>
<tr>
<td valign="top" height="34"><font face="Arial" size="2" color="#990000">•</font></td><td height="34"><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1108/cmanheim.asp"><font SIZE="2" face="Arial" color="#000080">Oh, baby! Camryn Manheim expecting</font></a></td>
</tr>
</table>
<table width="138">
<tr>
<td valign="top" height="34"><font face="Arial" size="2" color="#990000">•</font></td><td height="34"><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/tv/gist/wed03.asp"><font SIZE="2" face="Arial" color="#000080"><i>Frasier'</i>s Dr. Laura spoof
M.I.A.</font></a></td>
</tr>
<tr>
<td valign="top" height="34"><font face="Arial" size="2" color="#990000">•</font></td><td height="34" valign="top"><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1108/oj.asp"><font SIZE="2" face="Arial" color="#000080">Judge
boots<br>
O.J. suit</font></a></td>
</tr>
<tr>
<td valign="top" height="34"><font face="Arial" size="2" color="#990000">•</font></td><td height="34"><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/tv/gist/wed02.asp"><font SIZE="2" face="Arial" color="#000080"><i>Survivor</i>'s Richard undergoes lipo</font></a></td>
</tr>
<tr>
<td valign="top"><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1108/trailers.asp"><font SIZE="2" face="Arial"><font color="#000080">Theater owners take out trailer trash</font> </font></a></td>
</tr>
</table>
<table width="138">
<tr valign="top">
<td><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1107/rosie.asp"><font SIZE="2" face="Arial" color="#000080">Rosie's
political ultimatum</font></a></td>
</tr>
</table>
<table width="138">
<tr>
<td valign="top"><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1107/grinch.asp"><font face="Arial" size="2" color="#000080">Mrs.
Seuss: Whoville is not Pooh-ville</font></a></td>
</tr>
<tr>
<td valign="top"><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1107/madonna.asp"><font face="Arial" size="2" color="#000080">Madonna
makes some live <i>Music</i></font></a></td>
</tr>
<tr valign="top">
<td><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1107/leo.asp"><font SIZE="2" face="Arial" color="#000080">Leo
running late for next film</font></a></td>
</tr>
<tr valign="top">
<td><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/music/news/tue01.asp"><font SIZE="2" face="Arial" color="#000080">Pixies
to reunite -- partially?</font></a></td>
</tr>
</table>
<table width="138">
<tr>
<td valign="top"><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1106/scooby.asp"><font face="Arial" size="2" color="#000080">Gellar,
Prinze may help <i>Scooby</i></font></a></td>
</tr>
<tr valign="top">
<td><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/CSspecial/box_office/"><font SIZE="2" face="Arial" color="#000080">Devilish
<i>Angels</i> soars at box office</font></a></td>
</tr>
<tr valign="top">
<td><font face="Arial" size="2" color="#990000">•</font></td><td><a style="COLOR: #000099" href="/web/20001109011800/http://entertainment.msn.com/news/eonline/1106/mcrichton.asp"><font SIZE="2" face="Arial" color="#000080">Crichton
gets medieval on PCs</font></a></td>
</tr>
</table>
<table width="138">
<tr valign="top">
<td cellspacing="0" cellpadding="0">
<hr color="#000000"> <center>
<font face="Arial" size="2" color="#000000ss">
This site is best viewed with<br>
MSN Explorer.</font><br>
<a href="/web/20001109011800/http://explorer.msn.com/">
<img src="/web/20001109011800im_/http://entertainment.msn.com/images/animated_highlight_tm_free.gif" border="0"></a></center>
</td>
</tr>
</table>
<!-- vertical space between boxes -->
<tr height="3"><td height="3" width="100%">
</td></tr>
</table>
</td>
</TR>
</TD>
</tr>
</table>
<!-- Reserved Blip -->
<!-- Reserved Blip -->
<!-- Main Content End -->
<table cellpadding=0 cellspacing=0 border=0 bgcolor=#FFFFFFF width=608 height=2>
<tr valign="top" height="2"><td><img src="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" border="0" width="1" height="2"></td></tr></table>
<!--Bottom Navigation bar from the network template//-->
<!--Repeated Toolbar Blip//-->
<!--Network ToolBar//-->
<table cellpadding=0 cellspacing=0 border=0 bgcolor=#336699 width=608 height=17>
<tr>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Home </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/books/books.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Books </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/celebs/celebs.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Celebs </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/movies/movies.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Movies </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/music/music.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Music </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="76" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/shopping/default.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> Shopping </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/tv/tv.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> TV </font></a></td>
<td nowrap width=1 bgcolor=white></td>
<td nowrap width="75" align="center" onmouseover="mOvr(this,'#6699CC');" onmouseout="mOut(this,'#336699');" onclick="mClk(this);"><a href="/web/20001109011800/http://entertainment.msn.com/news/news.asp" class=t> <font face=arial,sans-serif size=2 color=#FFFFFF class=t> News </font></a></td>
</tr>
</table>
<table cellpadding=0 cellspacing=0 border=0 bgcolor=#FFFFFFF width=608 height=2>
<tr valign="top" height="2"><td><img src="/web/20001109011800im_/http://entertainment.msn.com/images/spacer.gif" border="0" width="1" height="2"></td></tr></table>
<!--Network Footers//-->
<!--Footer 1 //-->
<table cellpadding=0 cellspacing=0 border=0 width=608>
<tr>
<td width=208 valign=top align=left>
<A HREF="/web/20001109011800/http://go.msn.com/A5/1/"><IMG SRC="/web/20001109011800im_/http://entertainment.msn.com/images/logo.gif" WIDTH=140 HEIGHT=60 BORDER=0 ALT="go to MSN.com" align=middle></A> <table cellpadding=0 cellspacing=0 border=0>
<tr>
<td width=20> </td>
<td align=left valign=top>
<font size=2 face=arial,sans-serif class=rL>Return to <b><a href="/web/20001109011800/http://entertainment.msn.com/" class=rL>Entertainment</a></b></font></td>
<td width=20> </td>
</tr>
</table></td>
<td width=190 valign=top align=left>
<font size=2 face=arial,sans-serif><b>Other Links:</b></font>
<hr size=1 color=#000000 noshade>
<font size=1 face=arial,sans-serif>
<a href="/web/20001109011800/http://go.msn.com/A5/2/WOMEN.ASP" class=fL>Women</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/2/CHAT.ASP" class=fL>Chat</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/2/FREEGAMES.ASP" class=fL>Free Games</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/2/DOWNLOADS.ASP" class=fL>Downloads</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/2/RESEARCH.ASP" class=fL>Research & School</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/2/MORE.ASP" class=fL>More...</a><br><br>
</font>
</td>
<td width=20></td>
<td width=190 valign=top align=left>
<font size=2 face="arial,sans-serif"><b>Special Features:</b></font>
<hr size=1 color=#000000 noshade>
<a href="/web/20001109011800/http://go.msn.com/A5/3/it1.asp?target=http://moneycentral.msn.com/insure/insur.asp?itype=auto" class="fL">Getting scammed on auto insurance?</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/3/it2.asp?target=http://entertainment.msn.com/special/fall_entertainment_guide/default.asp" class="fL">What's HOT and NOT this Fall</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/3/it3.asp?target=http://cagle.slate.msn.com/" class="fL">Looking for a new perspective?</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/3/it4.asp?target=http://preview.msn.com/" class="fL">How to g
Traceback (most recent call last):
File "<pyshell#9>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#8>", line 68, in take_measurements
universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
File "<pyshell#8>", line 17, in tokens_to_hist_from_universe
parsed_goldhtml = lxml.html.parse(data_file,
AttributeError: 'module' object has no attribute 'html'
et more out of the web</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/3/it5.asp?target=http://chat.msn.com/features/categories/music.asp" class="fL">Add music to your favorite chat!</a><br>
<a href="/web/20001109011800/http://go.msn.com/A5/3/it10.asp?target=http://update.msn.com/quicklinks/default.asp" class="fL">More...</a><br><br>
</td>
</tr>
</table>
<!--Network Footers Tems and Conditions//-->
<!--Network Footers Tems and Conditions//-->
<table cellpadding=0 cellspacing=0 border=0 width=608>
<tr>
<td height=25 bgcolor=#000000 align=center class=fTOU >
<font size=1 face=arial,sans-serif color=#FFFFFF> ©2000 Microsoft Corporation. All rights reserved. </font>
<a href="/web/20001109011800/http://go.msn.com/A5/4/"><font size=1 face=arial,sans-serif color=#FFFFFF>Terms of Use</font></a>
<a href="/web/20001109011800/http://go.msn.com/A5/5/"><font size=1 face=arial,sans-serif color=#FFFFFF>Advertise</font></a>
<a href="/web/20001109011800/http://go.msn.com/A5/6/"><font size=1 face=arial,sans-serif color=#FFFFFF>TRUSTe Approved Privacy Statement</font></a>
</td>
</tr>
</table>
<!--Network Template Sidebar script//-->
<!--Network Template Sidebar Cell//-->
</TD>
<TD valign="top">
<iframe id=sidebar style="display: none" src="/web/20001109011800if_/http://arc1.msn.com/ADSAdClient31.dll?GetAd?PG=PROENT?SC=D1" border=0 frameborder=0 scrolling=no marginheight=0 marginwidth=0 width=160 height=600>
<a href="/web/20001109011800/http://arc1.msn.com/Clicker/ADSClicker31.dll?Redirect?PG=PROENT?AN=1.0">
<img src="/web/20001109011800im_/http://arc1.msn.com/ADSAdClient31.dll?GetImage?PG=PROENT?SC=D1?AN=1.0" width=160 height=600 border=0></a>
</iframe>
</TD>
<!--Network Template Sidebar Cell//-->
<!--Network Template Sidebar script//-->
<script language="Javascript">
<!--
function showHideSidebar() {
if(document.body) var iWidth = document.body.clientWidth;
if(document.all) document.all.sidebar.style.display = (iWidth > 753 ? '' :'none');
}
showHideSidebar();
function errortrap(msg,url,line){
return true;
}
onerror=errortrap;
//-->
</script>
<script language="javascript" type="text/javascript" for="window" event="onresize">
<!--
showHideSidebar();
function errortrap(msg,url,line){
return true;
}
onerror=errortrap;
//-->
</script>
<!--Network Template table bottom //-->
<!--Network Template Table bottom //-->
</TD>
</TR>
</TABLE>
</TD>
</TR>
</TABLE>
</BODY>
</HTML>
<!--
FILE ARCHIVED ON 1:18:00 Nov 9, 2000 AND RETRIEVED FROM THE
INTERNET ARCHIVE ON 4:04:57 Jan 8, 2015.
JAVASCRIPT APPENDED BY WAYBACK MACHINE, COPYRIGHT INTERNET ARCHIVE.
ALL OTHER CONTENT MAY ALSO BE PROTECTED BY COPYRIGHT (17 U.S.C.
SECTION 108(a)(3)).
-->
>>> import pickle
... import re
... import os
... import collections
... import lxml.html
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
Traceback (most recent call last):
File "<pyshell#11>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#10>", line 72, in take_measurements
val['measurements'] = calc_praf(goldstd,content,universe)
NameError: global name 'calc_praf' is not defined
>>> import pickle
... import re
... import os
... import collections
... import lxml.html
...
...
... def calc_praf(goldstd,predicted,universe):
...
... TP = predicted & goldstd
...
... FP = predicted - goldstd
...
... FN = (universe - predicted) & goldstd
...
... TN = (universe - predicted) & (universe - goldstd)
...
... precision = (histsum(TP)*1.0)/histsum(TP+FP)
...
... recall = (histsum(TP)*1.0)/histsum(TP+FN)
...
... accuracy = (histsum(TP+TN)*1.0)/histsum(TP+FP+FN+TN)
... try:
... f1 = 2 * (((precision*recall)*1.0)/(precision+recall))
... except ZeroDivisionError:
... f1 = 0
...
...
... return {'p':precision,'r':recall,'a':accuracy,'f1':f1}
...
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
Traceback (most recent call last):
File "<pyshell#13>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#12>", line 97, in take_measurements
val['measurements'] = calc_praf(goldstd,content,universe)
File "<pyshell#12>", line 18, in calc_praf
precision = (histsum(TP)*1.0)/histsum(TP+FP)
NameError: global name 'histsum' is not defined
>>> import pickle
... import re
... import os
... import collections
... import lxml.html
...
...
... def histsum(hist): return sum(hist.values())
...
...
... def calc_praf(goldstd,predicted,universe):
...
... TP = predicted & goldstd
...
... FP = predicted - goldstd
...
... FN = (universe - predicted) & goldstd
...
... TN = (universe - predicted) & (universe - goldstd)
...
... precision = (histsum(TP)*1.0)/histsum(TP+FP)
...
... recall = (histsum(TP)*1.0)/histsum(TP+FN)
...
... accuracy = (histsum(TP+TN)*1.0)/histsum(TP+FP+FN+TN)
... try:
... f1 = 2 * (((precision*recall)*1.0)/(precision+recall))
... except ZeroDivisionError:
... f1 = 0
...
...
... return {'p':precision,'r':recall,'a':accuracy,'f1':f1}
...
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013.txt
Traceback (most recent call last):
File "<pyshell#15>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#14>", line 94, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#14>", line 78, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#14>", line 94, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013.txt'
>>> o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013','r')
Traceback (most recent call last):
File "<pyshell#16>", line 1, in <module>
o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013','r')
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013'
>>> o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview.txt','r')
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview.txt
Traceback (most recent call last):
File "<pyshell#18>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#14>", line 94, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#14>", line 78, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#14>", line 94, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview.txt'
>>> len(linkquotatest)
0: 2
>>> o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview.txt','r')
Traceback (most recent call last):
File "<pyshell#20>", line 1, in <module>
o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview.txt','r')
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview.txt'
>>> o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt','r')
Traceback (most recent call last):
File "<pyshell#21>", line 1, in <module>
o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt','r')
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt'
>>> o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt.txt','r')
>>>
==================== New Session ====================
>>> import pickle
... import re
... import os
... import collections
... import lxml.html
...
...
... def histsum(hist): return sum(hist.values())
...
...
... def calc_praf(goldstd,predicted,universe):
...
... TP = predicted & goldstd
...
... FP = predicted - goldstd
...
... FN = (universe - predicted) & goldstd
...
... TN = (universe - predicted) & (universe - goldstd)
...
... precision = (histsum(TP)*1.0)/histsum(TP+FP)
...
... recall = (histsum(TP)*1.0)/histsum(TP+FN)
...
... accuracy = (histsum(TP+TN)*1.0)/histsum(TP+FP+FN+TN)
... try:
... f1 = 2 * (((precision*recall)*1.0)/(precision+recall))
... except ZeroDivisionError:
... f1 = 0
...
...
... return {'p':precision,'r':recall,'a':accuracy,'f1':f1}
...
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview.txt
Traceback (most recent call last):
File "<pyshell#1>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#0>", line 94, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#0>", line 78, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#0>", line 94, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview.txt'
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview.txt
Traceback (most recent call last):
File "<pyshell#2>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#0>", line 94, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#0>", line 78, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#0>", line 94, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview.txt'
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
Traceback (most recent call last):
File "<pyshell#3>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#0>", line 94, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#0>", line 78, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#0>", line 94, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\thenation.com\\2010\\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt'
>>> len(linkquotatest)
0: 14
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition.txt
Traceback (most recent call last):
File "<pyshell#5>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#0>", line 94, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#0>", line 78, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#0>", line 94, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\www.forbes.com\\2010\\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition.txt'
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
Traceback (most recent call last):
File "<pyshell#6>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#0>", line 94, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#0>", line 78, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#0>", line 94, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\www.forbes.com\\2010\\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt'
>>> import pickle
... import re
... import os
... import collections
... import lxml.html
...
...
... def histsum(hist): return sum(hist.values())
...
...
... def calc_praf(goldstd,predicted,universe):
...
... TP = predicted & goldstd
...
... FP = predicted - goldstd
...
... FN = (universe - predicted) & goldstd
...
... TN = (universe - predicted) & (universe - goldstd)
...
... precision = (histsum(TP)*1.0)/histsum(TP+FP)
...
... recall = (histsum(TP)*1.0)/histsum(TP+FN)
...
... accuracy = (histsum(TP+TN)*1.0)/histsum(TP+FP+FN+TN)
... try:
... f1 = 2 * (((precision*recall)*1.0)/(precision+recall))
... except ZeroDivisionError:
... f1 = 0
...
...
... return {'p':precision,'r':recall,'a':accuracy,'f1':f1}
...
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
...
... try:
... content = extract(data_filepath+'.txt')
...
... except Exception:
... print(data_filepath)
... raise
...
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport.txt
Traceback (most recent call last):
File "<pyshell#8>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#7>", line 102, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#7>", line 82, in tokens_to_hist_extractor
content = extract(data_filepath+'.txt')
File "<pyshell#7>", line 102, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\www.foxnews.com\\2010\\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport.txt.txt'
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
Traceback (most recent call last):
File "<pyshell#9>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#7>", line 102, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#7>", line 78, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#7>", line 102, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com\\2010\\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt'
>>> o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com\\2010\\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt','r')
Traceback (most recent call last):
File "<pyshell#10>", line 1, in <module>
o = open('c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com\\2010\\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt','r')
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com\\2010\\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt'
>>> import pickle
... import re
... import os
... import collections
... import lxml.html
...
...
... def histsum(hist): return sum(hist.values())
...
...
... def calc_praf(goldstd,predicted,universe):
...
... TP = predicted & goldstd
...
... FP = predicted - goldstd
...
... FN = (universe - predicted) & goldstd
...
... TN = (universe - predicted) & (universe - goldstd)
...
... precision = (histsum(TP)*1.0)/histsum(TP+FP)
...
... recall = (histsum(TP)*1.0)/histsum(TP+FN)
...
... accuracy = (histsum(TP+TN)*1.0)/histsum(TP+FP+FN+TN)
... try:
... f1 = 2 * (((precision*recall)*1.0)/(precision+recall))
... except ZeroDivisionError:
... f1 = 0
...
...
... return {'p':precision,'r':recall,'a':accuracy,'f1':f1}
...
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
...
... try:
... content = extract(data_filepath+'.txt')
...
... except Exception:
... try:
... content = extract(data_filepath+'.txt.txt')
... except:
... print(data_filepath)
... raise
... '''paths = os.path.split(data_filepath)
...
... shortname = paths[-1][:50]
...
... leadingpath = os.path.join(paths[:-1])
... for
... content = extract(os.+'.txt')
... '''
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
Traceback (most recent call last):
File "<pyshell#12>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#11>", line 112, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#11>", line 78, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#11>", line 112, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com\\2010\\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt'
>>> import pickle
... import re
... import os
... import collections
... import lxml.html
...
...
... def histsum(hist): return sum(hist.values())
...
...
... def calc_praf(goldstd,predicted,universe):
...
... TP = predicted & goldstd
...
... FP = predicted - goldstd
...
... FN = (universe - predicted) & goldstd
...
... TN = (universe - predicted) & (universe - goldstd)
...
... precision = (histsum(TP)*1.0)/histsum(TP+FP)
...
... recall = (histsum(TP)*1.0)/histsum(TP+FN)
...
... accuracy = (histsum(TP+TN)*1.0)/histsum(TP+FP+FN+TN)
... try:
... f1 = 2 * (((precision*recall)*1.0)/(precision+recall))
... except ZeroDivisionError:
... f1 = 0
...
...
... return {'p':precision,'r':recall,'a':accuracy,'f1':f1}
...
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
...
... try:
... content = extract(data_filepath)
... except Exception:
...
... try:
... content = extract(data_filepath+'.txt')
...
... except Exception:
... try:
... content = extract(data_filepath+'.txt.txt')
... except:
... print(data_filepath+'.txt')
... raise
... '''paths = os.path.split(data_filepath)
...
... shortname = paths[-1][:50]
...
... leadingpath = os.path.join(paths[:-1])
... for
... content = extract(os.+'.txt')
... '''
... print(data_filepath+'.txt')
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt.txt
Traceback (most recent call last):
File "<pyshell#14>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#13>", line 112, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#13>", line 78, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#13>", line 112, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com\\2010\\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt'
>>> import pickle
... import re
... import os
... import collections
... import lxml.html
...
...
... def histsum(hist): return sum(hist.values())
...
...
... def calc_praf(goldstd,predicted,universe):
...
... TP = predicted & goldstd
...
... FP = predicted - goldstd
...
... FN = (universe - predicted) & goldstd
...
... TN = (universe - predicted) & (universe - goldstd)
...
... precision = (histsum(TP)*1.0)/histsum(TP+FP)
...
... recall = (histsum(TP)*1.0)/histsum(TP+FN)
...
... accuracy = (histsum(TP+TN)*1.0)/histsum(TP+FP+FN+TN)
... try:
... f1 = 2 * (((precision*recall)*1.0)/(precision+recall))
... except ZeroDivisionError:
... f1 = 0
...
...
... return {'p':precision,'r':recall,'a':accuracy,'f1':f1}
...
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
... print(data_filepath)
... try:
... content = extract(data_filepath)
... except:
...
... try:
... content = extract(data_filepath+'.txt')
...
... except:
... try:
... content = extract(data_filepath+'.txt.txt')
... except:
... print(data_filepath+'.txt')
... raise
... '''paths = os.path.split(data_filepath)
...
... shortname = paths[-1][:50]
...
... leadingpath = os.path.join(paths[:-1])
... for
... content = extract(os.+'.txt')
... '''
... print(data_filepath+'.txt')
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt.txt
Traceback (most recent call last):
File "<pyshell#16>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#15>", line 112, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#15>", line 78, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#15>", line 112, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com\\2010\\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt'
[About 1678 more lines. Double-click to unfold]
>>> import pickle
... import re
... import os
... import collections
... import lxml.html
...
...
... def histsum(hist): return sum(hist.values())
...
...
... def calc_praf(goldstd,predicted,universe):
...
... TP = predicted & goldstd
...
... FP = predicted - goldstd
...
... FN = (universe - predicted) & goldstd
...
... TN = (universe - predicted) & (universe - goldstd)
...
... precision = (histsum(TP)*1.0)/histsum(TP+FP)
...
... recall = (histsum(TP)*1.0)/histsum(TP+FN)
...
... accuracy = (histsum(TP+TN)*1.0)/histsum(TP+FP+FN+TN)
... try:
... f1 = 2 * (((precision*recall)*1.0)/(precision+recall))
... except ZeroDivisionError:
... f1 = 0
...
...
... return {'p':precision,'r':recall,'a':accuracy,'f1':f1}
...
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
... print(data_filepath)
... try:
... content = extract(data_filepath)
... except IOError:
...
... try:
... content = extract(data_filepath+'.txt')
...
... except IOError:
... try:
... content = extract(data_filepath+'.txt.txt')
... except IOError:
... print(data_filepath+'.txt')
... raise
... '''paths = os.path.split(data_filepath)
...
... shortname = paths[-1][:50]
...
... leadingpath = os.path.join(paths[:-1])
... for
... content = extract(os.+'.txt')
... '''
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
Traceback (most recent call last):
File "<pyshell#18>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#17>", line 112, in take_measurements
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
File "<pyshell#17>", line 78, in tokens_to_hist_extractor
content = extract(data_filepath)
File "<pyshell#17>", line 112, in <lambda>
content = tokens_to_hist_extractor(lambda x: str(open(x).read()), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com\\2010\\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt'
[About 1677 more lines. Double-click to unfold]
>>> import pickle
... import re
... import os
... import collections
... import lxml.html
...
...
... def histsum(hist): return sum(hist.values())
...
...
... def calc_praf(goldstd,predicted,universe):
...
... TP = predicted & goldstd
...
... FP = predicted - goldstd
...
... FN = (universe - predicted) & goldstd
...
... TN = (universe - predicted) & (universe - goldstd)
...
... precision = (histsum(TP)*1.0)/histsum(TP+FP)
...
... recall = (histsum(TP)*1.0)/histsum(TP+FN)
...
... accuracy = (histsum(TP+TN)*1.0)/histsum(TP+FP+FN+TN)
... try:
... f1 = 2 * (((precision*recall)*1.0)/(precision+recall))
... except ZeroDivisionError:
... f1 = 0
...
...
... return {'p':precision,'r':recall,'a':accuracy,'f1':f1}
...
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
... print(data_filepath)
... try:
... content = extract(data_filepath).read()
... except IOError:
...
... try:
... content = extract(data_filepath+'.txt').read()
...
... except IOError:
... try:
... content = extract(data_filepath+'.txt.txt').read()
... except IOError:
... print(data_filepath+'.txt')
... raise
... '''paths = os.path.split(data_filepath)
...
... shortname = paths[-1][:50]
...
... leadingpath = os.path.join(paths[:-1])
... for
... content = extract(os.+'.txt')
... '''
... print(data_filepath)
... raise
...
...
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: open(x), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
Traceback (most recent call last):
File "<pyshell#20>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#19>", line 112, in take_measurements
content = tokens_to_hist_extractor(lambda x: open(x), os.path.abspath(val['testpath']))
File "<pyshell#19>", line 78, in tokens_to_hist_extractor
content = extract(data_filepath).read()
File "<pyshell#19>", line 112, in <lambda>
content = tokens_to_hist_extractor(lambda x: open(x), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com\\2010\\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt'
[About 1677 more lines. Double-click to unfold]
>>> import pickle
... import re
... import os
... import collections
... import lxml.html
...
...
... def histsum(hist): return sum(hist.values())
...
...
... def calc_praf(goldstd,predicted,universe):
...
... TP = predicted & goldstd
...
... FP = predicted - goldstd
...
... FN = (universe - predicted) & goldstd
...
... TN = (universe - predicted) & (universe - goldstd)
...
... precision = (histsum(TP)*1.0)/histsum(TP+FP)
...
... recall = (histsum(TP)*1.0)/histsum(TP+FN)
...
... accuracy = (histsum(TP+TN)*1.0)/histsum(TP+FP+FN+TN)
... try:
... f1 = 2 * (((precision*recall)*1.0)/(precision+recall))
... except ZeroDivisionError:
... f1 = 0
...
...
... return {'p':precision,'r':recall,'a':accuracy,'f1':f1}
...
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
... print(data_filepath)
...
... if os.path.exists(data_filepath):
...
... try:
... content = extract(data_filepath).read()
... except IOError:
...
... print(data_filepath)
... raise
...
... else:
... data_filepath += '.txt'
... try:
... content = extract(data_filepath).read()
... except IOError:
...
... print(data_filepath)
... raise
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: open(x), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
Traceback (most recent call last):
File "<pyshell#22>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#21>", line 107, in take_measurements
universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
File "<pyshell#21>", line 42, in tokens_to_hist_from_universe
with open(data_filepath,'r') as data_file:
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\dataset\\www.foxnews.com\\2010\\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.html'
[About 1672 more lines. Double-click to unfold]
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
Traceback (most recent call last):
File "<pyshell#23>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#21>", line 109, in take_measurements
goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
File "<pyshell#21>", line 62, in tokens_to_hist_goldstd
with open(os.path.abspath(test_filepath), 'r') as f:
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\dataset\\www.foxnews.com\\2010\\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt'
[About 1672 more lines. Double-click to unfold]
>>> import pickle
... import re
... import os
... import collections
... import lxml.html
...
...
... def histsum(hist): return sum(hist.values())
...
...
... def calc_praf(goldstd,predicted,universe):
...
... TP = predicted & goldstd
...
... FP = predicted - goldstd
...
... FN = (universe - predicted) & goldstd
...
... TN = (universe - predicted) & (universe - goldstd)
...
... precision = (histsum(TP)*1.0)/histsum(TP+FP)
...
... recall = (histsum(TP)*1.0)/histsum(TP+FN)
...
... accuracy = (histsum(TP+TN)*1.0)/histsum(TP+FP+FN+TN)
... try:
... f1 = 2 * (((precision*recall)*1.0)/(precision+recall))
... except ZeroDivisionError:
... f1 = 0
...
...
... return {'p':precision,'r':recall,'a':accuracy,'f1':f1}
...
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
... if not os.path.exists(data_filepath):
... data_filepath += '.txt'
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... if not os.path.exists(test_filepath):
... test_filepath += '.txt'
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
... print(data_filepath)
...
... if os.path.exists(data_filepath):
...
... try:
... content = extract(data_filepath).read()
... except IOError:
...
... print(data_filepath)
... raise
...
... else:
... data_filepath += '.txt'
... try:
... content = extract(data_filepath).read()
... except IOError:
...
... print(data_filepath)
... raise
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: open(x), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
[About 161 more lines. Double-click to unfold]
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
Traceback (most recent call last):
File "<pyshell#25>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#24>", line 111, in take_measurements
universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
File "<pyshell#24>", line 43, in tokens_to_hist_from_universe
with open(data_filepath,'r') as data_file:
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\dataset\\www.foxnews.com\\2010\\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.html.txt'
[About 1693 more lines. Double-click to unfold]
>>> import pickle
... import re
... import os
... import collections
... import lxml.html
...
...
... def histsum(hist): return sum(hist.values())
...
...
... def calc_praf(goldstd,predicted,universe):
...
... TP = predicted & goldstd
...
... FP = predicted - goldstd
...
... FN = (universe - predicted) & goldstd
...
... TN = (universe - predicted) & (universe - goldstd)
...
... precision = (histsum(TP)*1.0)/histsum(TP+FP)
...
... recall = (histsum(TP)*1.0)/histsum(TP+FN)
...
... accuracy = (histsum(TP+TN)*1.0)/histsum(TP+FP+FN+TN)
... try:
... f1 = 2 * (((precision*recall)*1.0)/(precision+recall))
... except ZeroDivisionError:
... f1 = 0
...
...
... return {'p':precision,'r':recall,'a':accuracy,'f1':f1}
...
...
... def listsubdir(directory):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if os.path.isdir(os.path.join(d, f))]
...
...
... def tokens_to_hist_from_universe(data_filepath):
...
... with open(data_filepath,'r') as data_file:
...
... try:
... parsed_goldhtml = lxml.html.parse(data_file,
... lxml.html.HTMLParser(encoding="utf-8"))
... except:
... print(str(data_file.read()))
... raise
... tokenized_content = collections.Counter()
...
... content = "".join(parsed_goldhtml.xpath('//text()'))
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_goldstd(test_filepath):
...
... if not os.path.exists(test_filepath):
... test_filepath += '.txt'
...
... with open(os.path.abspath(test_filepath), 'r') as f:
...
...
... content = str(f.read())
...
... tokenized_content = collections.Counter()
...
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
...
... def tokens_to_hist_extractor(extract, data_filepath):
... print(data_filepath)
...
... if os.path.exists(data_filepath):
...
... try:
... content = extract(data_filepath).read()
... except IOError:
...
... print(data_filepath)
... raise
...
... else:
... data_filepath += '.txt'
... try:
... content = extract(data_filepath).read()
... except IOError:
...
... print(data_filepath)
... raise
... tokenized_content = collections.Counter()
... tokenized_content.update(re.split(r'\W+', content))
...
... return tokenized_content
...
...
... def take_measurements(prep_data):
...
... for key, val in prep_data.iteritems():
...
... content = tokens_to_hist_extractor(lambda x: open(x), os.path.abspath(val['testpath']))
...
... universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
...
... goldstd = tokens_to_hist_goldstd(os.path.abspath(val['goldpath']))
...
... val['measurements'] = calc_praf(goldstd,content,universe)
...
... return prep_data
...
... def prepare_for_measurements(gold_directory,
... test_directory=None,
... goldfile_ext='.txt',
... testfile_ext='.html',
... universe_ext='.html',
... pickle_output_name = None):
... '''Given a directory, process the files (gold standard) in the following
... steps:
... 1. Build dictionary of file names to file paths to gold std files and test file
...
... 2. For each file name, get associated "universe" values (all possible outcomes W)
...
... 3. For each file name, get goldstd histogram (G)
...
... 4. For each file name, get extractor's output (C)
... '''
...
... filenames = set([re.sub(("\\"+ goldfile_ext+"|"+"\\"+testfile_ext+"|"+"\\"+universe_ext),"",name)
... for name in os.listdir(gold_directory)])
...
...
...
... if test_directory:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
... else:
... to_test_pkg = { name:
... {
... 'domain': os.path.split(os.path.split(gold_directory)[0])[1],
... 'year': os.path.split(gold_directory)[1],
... 'goldpath':os.path.join(gold_directory, name+goldfile_ext),
... 'universepath':os.path.join(gold_directory, name+universe_ext),
... 'testpath':os.path.join(test_directory, name+testfile_ext)
... } for name in filenames }
...
...
... return to_test_pkg
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
[About 19 more lines. Double-click to unfold]
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
Traceback (most recent call last):
File "<pyshell#27>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#26>", line 110, in take_measurements
universe = tokens_to_hist_from_universe(os.path.abspath(val['universepath']))
File "<pyshell#26>", line 42, in tokens_to_hist_from_universe
with open(data_filepath,'r') as data_file:
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\dataset\\www.foxnews.com\\2010\\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.html'
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Delays at Two Airports Thanks to Suspicious Luggage - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Fla. Man Calls Police After Pot Plants Stolen - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Obama U.S. Intelligence Should Have 'Uncovered' Christmas Day Plot.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\United Airlines Pilot Charged With Being Over Alcohol Limit - International News News of the World Middle East News Europe News - FOXN.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Canada to Use Full-Body Scanners in Airports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - U.S. Feared Spectacular Terror Attack at Obama's Inauguration.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Dubai Opens World's Tallest Building - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Scientists Dolphins Should Be Treated as 'Non-Human Persons'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Suspect in N.H. Machete Attack Regrets Girl Survived - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\U.S. Embassy in Yemen Reopens After Threat - Middle East Map News Crisis - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\State of the Union Obama proposals show president on �offense� despite GOP wave Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Purported cartel hit man arrested in disappearance of 43 Mexican students Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Today is 'saddest day of the year' -- and there's a 'Blue Monday' equation that 'explains' why Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\By 2050, cancer will hardly kill anyone under 80, researchers say Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Faith, community leaders, family gather in Atlanta to celebrate Martin Luther King Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\2015 Ford Shelby GT Debuts with 625 Horsepower Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\ISIS leader orders failed fighters executed, says report Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Stanford students to see what admissions officers wrote on their applications Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Glut is expected to drive down the price of milk Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Man dies in 1,000-foot fall from Alaska mountain Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Dad gets 'no-show' bill after son, 5, misses birthday party Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Colorado lawmakers look to ban asset seizures without convictions Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Oklahoma man at center of police shooting said he never made 911 call that led to raid Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Lebanon-born porn star draws fans, death threats after performing in hijab Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Official Cuba terror listing won�t affect US push for embassy, diplomatic ties Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\'Selma' sets off new fight over Martin Luther King's contested legacy Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Report Harry Reid 'most vulnerable' in 2016, may retire Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Study used to bolster NY fracking ban developed by anti-fracking 'activists' Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Ben & Jerry�s jumps on the cookie butter trend with new ice cream flavors Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Bible story Doubts raised over a Texas inaugural tradition Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Cecilia Abbott poised to become Texas� first Latina First Lady Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Pope expected to visit Philadelphia, White House, UN on September US trip Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Military continues development of stealth hybrid motorcycle Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\America pays tribute to Martin Luther King as events held across the nation Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Officials No others involved in Argentine special prosecutor death Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Kentucky man who led 'Bonnie and Clyde' spree agrees to go home and face charges Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Volcano Towns' Future Shrouded in Uncertainty.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20001110010200_www.latimes.com_sports_updates_lakers_lat_lakers001109.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Jury Awards Smokers Millions for Ills Caused by Cigarettes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_business_20000814_t000076119.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_seats000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_nation_updates_lat_sub000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20001110010200_www.latimes.com_travel_california_lat_oakland001105.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000407195301_www.latimes.com_business_updates_lat_janitor000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_politics_elect2000_pres_lat_tension000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_mozambique000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20001110010200_www.calendarlive.com_top_1,1419,L_LATimes_Restaurants_X!PlaceDetail_35986,00.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Experts Fret Over Effect of Gene Patents on Research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Anti-Flag Trek Ends; Banner Yet Waves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Janitors' Quest Complicated by Shifting Nature of the Job.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\For Web Crowd, Stock Market's No Party Pooper.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Quackenbush Let Insurer Avoid State Investigation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_state_reports_youth_lat_kidcity000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_dems000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000407195301_www.latimes.com_living_lat_ramos000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_protest000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Makers of Smart Cards Are Betting Big on U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_costco000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Elian to Be Returned to Father Next Week, Reno Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_monarch000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Ivory Stash Driving Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_classified_realestate_hotprop_la_re_hotprop12jun12,0,7414036_story_coll_la_home_realestate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_news_nationworld_wire_ats_ap_top14jun15,0,2017011_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Digging Into Seymour Hersh.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_commentary_la_oe_debellaigue15jun15,0,974217_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_la_ed_fedmay1mar01,0,7488388_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_features_outdoors_la_os_wildflowers1mar01,0,2946111_story_coll_la_home_outdoors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Los Angeles Times - Postcards from Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_oe_morrison2mar02,0,3036572_column_coll_la_home_utilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_iraq_la_fg_tapes2mar02,0,2971270_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_politics_la_me_hertzlaw2mar02,0,6813645_story_coll_la_home_politics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_me_disney2mar02,0,5119900_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_calcook2mar02,0,7727608_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Sex, Politics and President Hillary.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_nation_la_na_chicago2mar02,0,531573_story_coll_la_home_nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_mildredpierce2mar02,0,7398991_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_commentary_la_oe_bandow2mar02,0,4787970_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Loss Strengthens Brothers' Bond.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Judge Upholds S_D_ Mayoral Election.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\George Bush Talks Big, and He Delivers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_fi_kit2mar02,0,5314050_story_coll_la_home_local.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_sunday_commentary_la_op_faith12jun12,0,6389562_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_jews2mar02,0,5443257_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_ushaiti1mar01,0,3978550_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Shiite Leads Iraq Vote; 3 Marines Killed.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_wire_ats_ap_top19mar02,0,181991_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Would-be LAX terrorist's prison sentence overturned as too lenient - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\AIG begins awarding abbreviated bonuses - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\FDA scolds maker of Tylenol for foot-dragging in recall - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\U.S. is coming to Haiti's aid as fast as it can, Pentagon officials say - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\China's investments in U.S. up sharply - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Obama calls for action on healthcare - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\To combat piracy, UCLA reaches for the Clicker Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Gates makes recommendations in Ft. Hood shooting case - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Bias raises gays' risk of mental disorders, witness at Prop. 8 trial says - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Quakes in Chile and Haiti raise concerns about Southland preparedness - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Education should accompany prostate screening, new guidelines say - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Woman killed by hit-and-run driver in Harbor Gateway neighborhood L.A. NOW Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Nigerian president's health remains a mystery - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\White House considers D.C. trial for Guantanamo detainee - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Aid surge gets going in Haiti - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\In Illinois, it's Kirk (R) vs Giannoulias (D) for Obama's former Senate seat Top of the Ticket Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Drug maker Johnson & Johnson paid kickbacks to mega-pharmacy, U.S. charges in civil complaint D.C. Now Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Suspect in plane bomb plot is reportedly cooperating again with authorities - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\California Medical Board admits allowing troubled doctor to monitor another - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\FDA issues warnings on food labels - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Cross found at Air Force Academy's Wicca center - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Among the gated enclaves, anger and fear over Chelsea King's killing - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Sept. 11 hijackers' Florida apartment building to be razed - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Rep. Charles Rangel temporarily steps down as Ways and Means chairman - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Apple has a flickering fix for the 27-inch iMac Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Placentia teacher charged with molestation; prosecutors seek other victims - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Yelp names BBQ spot in Big Pine, CA the best restaurant in the country - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\California lawmakers heading to policy retreats to prepare for year - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Israel's spy agency denies opposing new Iran sanctions - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Obama won't meet with Netanyahu during U.S. visit, White House says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\3 arrested, drugs, military-style weapons seized in standoff - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\City National, L.A.'s 'bank to the stars,' sold for $5.4 billion - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Are drug-carrying drones the future of smuggling Probably not, DEA says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Dodger Stadium attacker to plead guilty to federal weapons charge - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Why Will Ferrell hit a cheerleader in the head with a basketball - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\GOP operative plans anti-Steyer effort - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Ex-state Sen. Calderon seeks another delay in public corruption trial - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Jury acquits skin-care guru accused of plot against rival - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\DreamWorks Animation to cut 500 jobs - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Glendale mental health center named for L.A. County Supervisor Antonovich - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Police appear to try to tip man out of his wheelchair in video - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Billionaire Tom Steyer eyes climate change, education in potential Senate bid - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\U.S. judge rejects overtime for home health aides - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Tom Steyer's exit shifts Senate race focus to Antonio Villaraigosa - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\St. Louis officers fatally shoot man armed with loaded gun, police say - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Thousands of fiery red crabs wash ashore in Newport Beach - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Environmentalist Tom Steyer opts out of Senate race - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Boston bombing trial delayed as jury selection bogs down - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Kobe Bryant suffers torn rotator cuff in right shoulder - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\India court orders activist on 14-year hunger strike freed - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\gotham External Affairs by JOEL ROSENBLATT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\movies In Brief Judy Berlin by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\intelligencer March 6, 2000 by BETH LANDMAN KEIL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cityside The Laptop Nomads by MARK JACOBSON (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Silicon Alley 10003 by VANESSA GRIGORIADIS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Best Bets by CORKY POLLAN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\classical music Some Like It Tepid by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Song And Dance by MICHAEL TOMASKY (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\smart city Smooth Moves by ROSE-ANNE CLERMONT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\movies Man in the Muddle by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\gotham style The jean jacket, reconsidered (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\theater Love Canal by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\pop music Bold Ambition by ETHAN BROWN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\dance Roots by TOBI TOBIAS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\the city politic Screening Rooms by PEGGY EDERSHEIM KALB (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\sales & bargains Snake Charmers by SHYAMA PATEL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Brella Bar Opens on Elizabeth St. - Sleep Opens in Williamsburg - Katrina Parris Moves to Harlem - Plain Sud Closing.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\A Crime Rap Sheet on Clinton Street.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\I Am a Bird Now - New York Magazine Pop Music Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\New York's Latest Dating Fad.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Co-op Boards and Brokers Cooperate - LeFrak City Expands to East 73rd St. - The Upper Upper West Side Price Difference.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Remembering Architect Philip Johnson.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Who Failed the C Train.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Peter Hayn - New York Magazine Look Book.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Onera - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Sukhadia's - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Jersey Shore to Miami Clubs Give Us Your Dignity -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Finding Out About Merrill Lynch�s CDO Problem Was Like Getting Kicked in the Balls, Says Former CEO -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Will Apple Announce a Mass iPhone Recall Friday -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\AIG Chairman Steps Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Shoeshiner Arrested for Burning Stand Down Twice, Returns to Work Next Day -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Attorney General Candidate Eric Schneiderman Gets Attention for the Wrong Reasons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Easter to Blame for Unemployment -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Comptroller DiNapoli Is Dragged Into Hevesi State Pension-Fund Scandal -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Former NYU Chemistry Director Submitted Over $400K Worth of Fake Receipts -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Taliban�s Terrifying Army of Monkey Terrorists Effectively Ends War -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Is Banksy�s Mr. Brainwash an Art-World Borat -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Snooki to Meghan McCain Your Dad Is Hot -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Cuban Spy Gets Life in Jail -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Bet You Thought Rachel Maddow Always Defied Her Gender Norms -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Mort Zuckerman Is Not an Obama Speechwriter -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Editor & Publisher Names Journal CEO Publisher of the Year -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Obama Orders Hospital Visitation Rights for Same-Sex Partners -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Pope Breaks Silence Over Recent Church Scandals -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Movies on the TV on the Computer on... -- The Projectionist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\See Sonic Youth�s Thurston Moore �Teaching� -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Manhattan Real Estate Market Reports Closings Up, Prices Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\U.S. Officials Knew About the Russian Spies for Ten Years -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Tea Party Express Releases Hit List -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Gaga for the Hamptons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Mike Bolt, Keeper of the Stanley Cup, on His White Gloves and Hanging Out With John Cusack -- The Sports Section.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Bradley Cooper Consumed 8,000 Calories a Day -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Emma Sulkowicz to Attend State of the Union -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Most-Coveted Titles at Idea Books� New Store -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Why Can�t Hollywood Get Women Journalists Right -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\What It�s Like to Date Your Dad -- Science of Us.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Can This Congressman Make America More Zen -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\How to Revive Your Summer Stripes in Winter -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Was That a Blow-Job Bib on Empire Last Night -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Sorry, Glassholes Google to Scrap Google Glass -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\It�s Now Okay to Sleep Through a Fashion Show -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\What It�s Like to Be One of �Karl�s Boys� -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Pope Doesn�t Want You Breeding Like Rabbits -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Guys, Drake Wants to Date a Writer -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Rise and Rise and Rise of ABC�s Ben Sherwood -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\53 Historians Weigh In on Barack Obama�s Legacy -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Isaac Mizrahi Does Not Know What the Sun Is -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Remember Glitter-Bombing Is Sparkly and Illegal -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Real, the Abstract The Work of Michael Mann -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Report Frida Giannini Departed Early From Gucci -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Kendall Jenner Reflects on Her Youth for WSJ -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\One Percent Have Almost Half the World�s Wealth -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\World Pissed That Women Just Want to Be Friends -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Objects With More Chemistry Than 50 Shades Stars -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\U.K. Spy Agency Collected Journalists� Emails -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Taylor Swift Wore Jodhpurs to Whole Foods -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Solange Posted the Sweetest MLK Tribute -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom.txt.txt
Traceback (most recent call last):
File "<pyshell#28>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#26>", line 108, in take_measurements
content = tokens_to_hist_extractor(lambda x: open(x), os.path.abspath(val['testpath']))
File "<pyshell#26>", line 93, in tokens_to_hist_extractor
content = extract(data_filepath).read()
File "<pyshell#26>", line 108, in <lambda>
content = tokens_to_hist_extractor(lambda x: open(x), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\www.foxnews.com\\2010\\Family, Friends Launch Online Campaign to Find Missing Utah Mom.txt.txt'
[About 3890 more lines. Double-click to unfold]
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt.txt
Traceback (most recent call last):
File "<pyshell#29>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#26>", line 108, in take_measurements
content = tokens_to_hist_extractor(lambda x: open(x), os.path.abspath(val['testpath']))
File "<pyshell#26>", line 93, in tokens_to_hist_extractor
content = extract(data_filepath).read()
File "<pyshell#26>", line 108, in <lambda>
content = tokens_to_hist_extractor(lambda x: open(x), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com\\2010\\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt.txt'
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Delays at Two Airports Thanks to Suspicious Luggage - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Fla. Man Calls Police After Pot Plants Stolen - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Obama U.S. Intelligence Should Have 'Uncovered' Christmas Day Plot.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\United Airlines Pilot Charged With Being Over Alcohol Limit - International News News of the World Middle East News Europe News - FOXN.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Canada to Use Full-Body Scanners in Airports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - U.S. Feared Spectacular Terror Attack at Obama's Inauguration.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Dubai Opens World's Tallest Building - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Scientists Dolphins Should Be Treated as 'Non-Human Persons'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Suspect in N.H. Machete Attack Regrets Girl Survived - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\U.S. Embassy in Yemen Reopens After Threat - Middle East Map News Crisis - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\State of the Union Obama proposals show president on �offense� despite GOP wave Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Purported cartel hit man arrested in disappearance of 43 Mexican students Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Today is 'saddest day of the year' -- and there's a 'Blue Monday' equation that 'explains' why Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\By 2050, cancer will hardly kill anyone under 80, researchers say Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Faith, community leaders, family gather in Atlanta to celebrate Martin Luther King Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\2015 Ford Shelby GT Debuts with 625 Horsepower Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\ISIS leader orders failed fighters executed, says report Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Stanford students to see what admissions officers wrote on their applications Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Glut is expected to drive down the price of milk Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Man dies in 1,000-foot fall from Alaska mountain Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Dad gets 'no-show' bill after son, 5, misses birthday party Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Colorado lawmakers look to ban asset seizures without convictions Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Oklahoma man at center of police shooting said he never made 911 call that led to raid Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Lebanon-born porn star draws fans, death threats after performing in hijab Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Official Cuba terror listing won�t affect US push for embassy, diplomatic ties Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\'Selma' sets off new fight over Martin Luther King's contested legacy Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Report Harry Reid 'most vulnerable' in 2016, may retire Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Study used to bolster NY fracking ban developed by anti-fracking 'activists' Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Ben & Jerry�s jumps on the cookie butter trend with new ice cream flavors Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Bible story Doubts raised over a Texas inaugural tradition Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Cecilia Abbott poised to become Texas� first Latina First Lady Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Pope expected to visit Philadelphia, White House, UN on September US trip Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Military continues development of stealth hybrid motorcycle Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\America pays tribute to Martin Luther King as events held across the nation Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Officials No others involved in Argentine special prosecutor death Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Kentucky man who led 'Bonnie and Clyde' spree agrees to go home and face charges Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Volcano Towns' Future Shrouded in Uncertainty.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20001110010200_www.latimes.com_sports_updates_lakers_lat_lakers001109.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Jury Awards Smokers Millions for Ills Caused by Cigarettes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_business_20000814_t000076119.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_seats000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_nation_updates_lat_sub000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20001110010200_www.latimes.com_travel_california_lat_oakland001105.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000407195301_www.latimes.com_business_updates_lat_janitor000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_politics_elect2000_pres_lat_tension000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_mozambique000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20001110010200_www.calendarlive.com_top_1,1419,L_LATimes_Restaurants_X!PlaceDetail_35986,00.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Experts Fret Over Effect of Gene Patents on Research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Anti-Flag Trek Ends; Banner Yet Waves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Janitors' Quest Complicated by Shifting Nature of the Job.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\For Web Crowd, Stock Market's No Party Pooper.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Quackenbush Let Insurer Avoid State Investigation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_state_reports_youth_lat_kidcity000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_dems000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000407195301_www.latimes.com_living_lat_ramos000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_protest000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Makers of Smart Cards Are Betting Big on U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_costco000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Elian to Be Returned to Father Next Week, Reno Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_monarch000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Ivory Stash Driving Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_classified_realestate_hotprop_la_re_hotprop12jun12,0,7414036_story_coll_la_home_realestate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_news_nationworld_wire_ats_ap_top14jun15,0,2017011_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Digging Into Seymour Hersh.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_commentary_la_oe_debellaigue15jun15,0,974217_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_la_ed_fedmay1mar01,0,7488388_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_features_outdoors_la_os_wildflowers1mar01,0,2946111_story_coll_la_home_outdoors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Los Angeles Times - Postcards from Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_oe_morrison2mar02,0,3036572_column_coll_la_home_utilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_iraq_la_fg_tapes2mar02,0,2971270_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_politics_la_me_hertzlaw2mar02,0,6813645_story_coll_la_home_politics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_me_disney2mar02,0,5119900_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_calcook2mar02,0,7727608_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Sex, Politics and President Hillary.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_nation_la_na_chicago2mar02,0,531573_story_coll_la_home_nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_mildredpierce2mar02,0,7398991_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_commentary_la_oe_bandow2mar02,0,4787970_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Loss Strengthens Brothers' Bond.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Judge Upholds S_D_ Mayoral Election.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\George Bush Talks Big, and He Delivers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_fi_kit2mar02,0,5314050_story_coll_la_home_local.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_sunday_commentary_la_op_faith12jun12,0,6389562_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_jews2mar02,0,5443257_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_ushaiti1mar01,0,3978550_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Shiite Leads Iraq Vote; 3 Marines Killed.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_wire_ats_ap_top19mar02,0,181991_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Would-be LAX terrorist's prison sentence overturned as too lenient - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\AIG begins awarding abbreviated bonuses - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\FDA scolds maker of Tylenol for foot-dragging in recall - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\U.S. is coming to Haiti's aid as fast as it can, Pentagon officials say - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\China's investments in U.S. up sharply - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Obama calls for action on healthcare - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\To combat piracy, UCLA reaches for the Clicker Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Gates makes recommendations in Ft. Hood shooting case - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Bias raises gays' risk of mental disorders, witness at Prop. 8 trial says - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Quakes in Chile and Haiti raise concerns about Southland preparedness - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Education should accompany prostate screening, new guidelines say - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Woman killed by hit-and-run driver in Harbor Gateway neighborhood L.A. NOW Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Nigerian president's health remains a mystery - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\White House considers D.C. trial for Guantanamo detainee - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Aid surge gets going in Haiti - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\In Illinois, it's Kirk (R) vs Giannoulias (D) for Obama's former Senate seat Top of the Ticket Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Drug maker Johnson & Johnson paid kickbacks to mega-pharmacy, U.S. charges in civil complaint D.C. Now Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Suspect in plane bomb plot is reportedly cooperating again with authorities - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\California Medical Board admits allowing troubled doctor to monitor another - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\FDA issues warnings on food labels - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Cross found at Air Force Academy's Wicca center - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Among the gated enclaves, anger and fear over Chelsea King's killing - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Sept. 11 hijackers' Florida apartment building to be razed - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Rep. Charles Rangel temporarily steps down as Ways and Means chairman - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Apple has a flickering fix for the 27-inch iMac Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Placentia teacher charged with molestation; prosecutors seek other victims - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Yelp names BBQ spot in Big Pine, CA the best restaurant in the country - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\California lawmakers heading to policy retreats to prepare for year - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Israel's spy agency denies opposing new Iran sanctions - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Obama won't meet with Netanyahu during U.S. visit, White House says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\3 arrested, drugs, military-style weapons seized in standoff - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\City National, L.A.'s 'bank to the stars,' sold for $5.4 billion - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Are drug-carrying drones the future of smuggling Probably not, DEA says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Dodger Stadium attacker to plead guilty to federal weapons charge - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Why Will Ferrell hit a cheerleader in the head with a basketball - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\GOP operative plans anti-Steyer effort - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Ex-state Sen. Calderon seeks another delay in public corruption trial - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Jury acquits skin-care guru accused of plot against rival - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\DreamWorks Animation to cut 500 jobs - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Glendale mental health center named for L.A. County Supervisor Antonovich - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Police appear to try to tip man out of his wheelchair in video - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Billionaire Tom Steyer eyes climate change, education in potential Senate bid - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\U.S. judge rejects overtime for home health aides - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Tom Steyer's exit shifts Senate race focus to Antonio Villaraigosa - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\St. Louis officers fatally shoot man armed with loaded gun, police say - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Thousands of fiery red crabs wash ashore in Newport Beach - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Environmentalist Tom Steyer opts out of Senate race - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Boston bombing trial delayed as jury selection bogs down - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Kobe Bryant suffers torn rotator cuff in right shoulder - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\India court orders activist on 14-year hunger strike freed - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\gotham External Affairs by JOEL ROSENBLATT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\movies In Brief Judy Berlin by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\intelligencer March 6, 2000 by BETH LANDMAN KEIL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cityside The Laptop Nomads by MARK JACOBSON (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Silicon Alley 10003 by VANESSA GRIGORIADIS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Best Bets by CORKY POLLAN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\classical music Some Like It Tepid by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Song And Dance by MICHAEL TOMASKY (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\smart city Smooth Moves by ROSE-ANNE CLERMONT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\movies Man in the Muddle by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\gotham style The jean jacket, reconsidered (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\theater Love Canal by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\pop music Bold Ambition by ETHAN BROWN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\dance Roots by TOBI TOBIAS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\the city politic Screening Rooms by PEGGY EDERSHEIM KALB (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\sales & bargains Snake Charmers by SHYAMA PATEL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Brella Bar Opens on Elizabeth St. - Sleep Opens in Williamsburg - Katrina Parris Moves to Harlem - Plain Sud Closing.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\A Crime Rap Sheet on Clinton Street.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\I Am a Bird Now - New York Magazine Pop Music Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\New York's Latest Dating Fad.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Co-op Boards and Brokers Cooperate - LeFrak City Expands to East 73rd St. - The Upper Upper West Side Price Difference.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Remembering Architect Philip Johnson.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Who Failed the C Train.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Peter Hayn - New York Magazine Look Book.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Onera - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Sukhadia's - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Jersey Shore to Miami Clubs Give Us Your Dignity -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Finding Out About Merrill Lynch�s CDO Problem Was Like Getting Kicked in the Balls, Says Former CEO -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Will Apple Announce a Mass iPhone Recall Friday -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\AIG Chairman Steps Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Shoeshiner Arrested for Burning Stand Down Twice, Returns to Work Next Day -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Attorney General Candidate Eric Schneiderman Gets Attention for the Wrong Reasons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Easter to Blame for Unemployment -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Comptroller DiNapoli Is Dragged Into Hevesi State Pension-Fund Scandal -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Former NYU Chemistry Director Submitted Over $400K Worth of Fake Receipts -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Taliban�s Terrifying Army of Monkey Terrorists Effectively Ends War -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Is Banksy�s Mr. Brainwash an Art-World Borat -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Snooki to Meghan McCain Your Dad Is Hot -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Cuban Spy Gets Life in Jail -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Bet You Thought Rachel Maddow Always Defied Her Gender Norms -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Mort Zuckerman Is Not an Obama Speechwriter -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Editor & Publisher Names Journal CEO Publisher of the Year -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Obama Orders Hospital Visitation Rights for Same-Sex Partners -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Pope Breaks Silence Over Recent Church Scandals -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Movies on the TV on the Computer on... -- The Projectionist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\See Sonic Youth�s Thurston Moore �Teaching� -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Manhattan Real Estate Market Reports Closings Up, Prices Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\U.S. Officials Knew About the Russian Spies for Ten Years -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Tea Party Express Releases Hit List -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Gaga for the Hamptons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Mike Bolt, Keeper of the Stanley Cup, on His White Gloves and Hanging Out With John Cusack -- The Sports Section.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Bradley Cooper Consumed 8,000 Calories a Day -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Emma Sulkowicz to Attend State of the Union -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Most-Coveted Titles at Idea Books� New Store -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Why Can�t Hollywood Get Women Journalists Right -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\What It�s Like to Date Your Dad -- Science of Us.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Can This Congressman Make America More Zen -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\How to Revive Your Summer Stripes in Winter -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Was That a Blow-Job Bib on Empire Last Night -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Sorry, Glassholes Google to Scrap Google Glass -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\It�s Now Okay to Sleep Through a Fashion Show -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\What It�s Like to Be One of �Karl�s Boys� -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Pope Doesn�t Want You Breeding Like Rabbits -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Guys, Drake Wants to Date a Writer -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Rise and Rise and Rise of ABC�s Ben Sherwood -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\53 Historians Weigh In on Barack Obama�s Legacy -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Isaac Mizrahi Does Not Know What the Sun Is -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Remember Glitter-Bombing Is Sparkly and Illegal -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Real, the Abstract The Work of Michael Mann -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Report Frida Giannini Departed Early From Gucci -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Kendall Jenner Reflects on Her Youth for WSJ -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\One Percent Have Almost Half the World�s Wealth -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\World Pissed That Women Just Want to Be Friends -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Objects With More Chemistry Than 50 Shades Stars -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\U.K. Spy Agency Collected Journalists� Emails -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Taylor Swift Wore Jodhpurs to Whole Foods -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Solange Posted the Sweetest MLK Tribute -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence.txt.txt
Traceback (most recent call last):
File "<pyshell#30>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#26>", line 108, in take_measurements
content = tokens_to_hist_extractor(lambda x: open(x), os.path.abspath(val['testpath']))
File "<pyshell#26>", line 93, in tokens_to_hist_extractor
content = extract(data_filepath).read()
File "<pyshell#26>", line 108, in <lambda>
content = tokens_to_hist_extractor(lambda x: open(x), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\BodyTextExtractorFilter\\www.foxnews.com\\2010\\Bomber Fooled CIA, Family, Jordanian Intelligence.txt.txt'
[About 3912 more lines. Double-click to unfold]
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Suspect in N.H. Machete Attack Regrets Girl Survived - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Delays at Two Airports Thanks to Suspicious Luggage - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Fla. Man Calls Police After Pot Plants Stolen - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Obama U.S. Intelligence Should Have 'Uncovered' Christmas Day Plot.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Canada to Use Full-Body Scanners in Airports - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\United Airlines Pilot Charged With Being Over Alcohol Limit - International News News of the World Middle East News Europe News - FOXN.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - U.S. Feared Spectacular Terror Attack at Obama's Inauguration.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Dubai Opens World's Tallest Building - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Scientists Dolphins Should Be Treated as 'Non-Human Persons'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\U.S. Embassy in Yemen Reopens After Threat - Middle East Map News Crisis - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\State of the Union Obama proposals show president on �offense� despite GOP wave Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Purported cartel hit man arrested in disappearance of 43 Mexican students Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Today is 'saddest day of the year' -- and there's a 'Blue Monday' equation that 'explains' why Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\By 2050, cancer will hardly kill anyone under 80, researchers say Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Faith, community leaders, family gather in Atlanta to celebrate Martin Luther King Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\2015 Ford Shelby GT Debuts with 625 Horsepower Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\ISIS leader orders failed fighters executed, says report Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Stanford students to see what admissions officers wrote on their applications Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Glut is expected to drive down the price of milk Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Man dies in 1,000-foot fall from Alaska mountain Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Dad gets 'no-show' bill after son, 5, misses birthday party Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Colorado lawmakers look to ban asset seizures without convictions Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Oklahoma man at center of police shooting said he never made 911 call that led to raid Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Lebanon-born porn star draws fans, death threats after performing in hijab Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Official Cuba terror listing won�t affect US push for embassy, diplomatic ties Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\'Selma' sets off new fight over Martin Luther King's contested legacy Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Report Harry Reid 'most vulnerable' in 2016, may retire Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Study used to bolster NY fracking ban developed by anti-fracking 'activists' Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Ben & Jerry�s jumps on the cookie butter trend with new ice cream flavors Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Bible story Doubts raised over a Texas inaugural tradition Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Cecilia Abbott poised to become Texas� first Latina First Lady Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Pope expected to visit Philadelphia, White House, UN on September US trip Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Military continues development of stealth hybrid motorcycle Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\America pays tribute to Martin Luther King as events held across the nation Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Officials No others involved in Argentine special prosecutor death Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Kentucky man who led 'Bonnie and Clyde' spree agrees to go home and face charges Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Volcano Towns' Future Shrouded in Uncertainty.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20001110010200_www.latimes.com_sports_updates_lakers_lat_lakers001109.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Jury Awards Smokers Millions for Ills Caused by Cigarettes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_business_20000814_t000076119.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_seats000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_nation_updates_lat_sub000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20001110010200_www.latimes.com_travel_california_lat_oakland001105.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000407195301_www.latimes.com_business_updates_lat_janitor000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_politics_elect2000_pres_lat_tension000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_mozambique000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20001110010200_www.calendarlive.com_top_1,1419,L_LATimes_Restaurants_X!PlaceDetail_35986,00.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Experts Fret Over Effect of Gene Patents on Research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Anti-Flag Trek Ends; Banner Yet Waves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Janitors' Quest Complicated by Shifting Nature of the Job.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\For Web Crowd, Stock Market's No Party Pooper.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Quackenbush Let Insurer Avoid State Investigation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_state_reports_youth_lat_kidcity000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_dems000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000407195301_www.latimes.com_living_lat_ramos000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_protest000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Makers of Smart Cards Are Betting Big on U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_costco000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Elian to Be Returned to Father Next Week, Reno Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_monarch000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Ivory Stash Driving Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_classified_realestate_hotprop_la_re_hotprop12jun12,0,7414036_story_coll_la_home_realestate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_news_nationworld_wire_ats_ap_top14jun15,0,2017011_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Digging Into Seymour Hersh.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_commentary_la_oe_debellaigue15jun15,0,974217_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_la_ed_fedmay1mar01,0,7488388_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_features_outdoors_la_os_wildflowers1mar01,0,2946111_story_coll_la_home_outdoors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Los Angeles Times - Postcards from Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_oe_morrison2mar02,0,3036572_column_coll_la_home_utilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_iraq_la_fg_tapes2mar02,0,2971270_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_politics_la_me_hertzlaw2mar02,0,6813645_story_coll_la_home_politics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_me_disney2mar02,0,5119900_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_calcook2mar02,0,7727608_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Sex, Politics and President Hillary.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_nation_la_na_chicago2mar02,0,531573_story_coll_la_home_nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_mildredpierce2mar02,0,7398991_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_commentary_la_oe_bandow2mar02,0,4787970_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Loss Strengthens Brothers' Bond.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Judge Upholds S_D_ Mayoral Election.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\George Bush Talks Big, and He Delivers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_fi_kit2mar02,0,5314050_story_coll_la_home_local.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_sunday_commentary_la_op_faith12jun12,0,6389562_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_jews2mar02,0,5443257_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_ushaiti1mar01,0,3978550_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Shiite Leads Iraq Vote; 3 Marines Killed.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_wire_ats_ap_top19mar02,0,181991_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Would-be LAX terrorist's prison sentence overturned as too lenient - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\AIG begins awarding abbreviated bonuses - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\FDA scolds maker of Tylenol for foot-dragging in recall - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\U.S. is coming to Haiti's aid as fast as it can, Pentagon officials say - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\China's investments in U.S. up sharply - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Obama calls for action on healthcare - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\To combat piracy, UCLA reaches for the Clicker Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Gates makes recommendations in Ft. Hood shooting case - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Bias raises gays' risk of mental disorders, witness at Prop. 8 trial says - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Quakes in Chile and Haiti raise concerns about Southland preparedness - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Education should accompany prostate screening, new guidelines say - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Woman killed by hit-and-run driver in Harbor Gateway neighborhood L.A. NOW Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Nigerian president's health remains a mystery - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\White House considers D.C. trial for Guantanamo detainee - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Aid surge gets going in Haiti - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\In Illinois, it's Kirk (R) vs Giannoulias (D) for Obama's former Senate seat Top of the Ticket Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Drug maker Johnson & Johnson paid kickbacks to mega-pharmacy, U.S. charges in civil complaint D.C. Now Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Suspect in plane bomb plot is reportedly cooperating again with authorities - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\California Medical Board admits allowing troubled doctor to monitor another - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\FDA issues warnings on food labels - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Cross found at Air Force Academy's Wicca center - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Among the gated enclaves, anger and fear over Chelsea King's killing - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Sept. 11 hijackers' Florida apartment building to be razed - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Rep. Charles Rangel temporarily steps down as Ways and Means chairman - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Apple has a flickering fix for the 27-inch iMac Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Placentia teacher charged with molestation; prosecutors seek other victims - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Yelp names BBQ spot in Big Pine, CA the best restaurant in the country - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\California lawmakers heading to policy retreats to prepare for year - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Israel's spy agency denies opposing new Iran sanctions - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Obama won't meet with Netanyahu during U.S. visit, White House says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\3 arrested, drugs, military-style weapons seized in standoff - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\City National, L.A.'s 'bank to the stars,' sold for $5.4 billion - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Are drug-carrying drones the future of smuggling Probably not, DEA says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Dodger Stadium attacker to plead guilty to federal weapons charge - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Why Will Ferrell hit a cheerleader in the head with a basketball - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\GOP operative plans anti-Steyer effort - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Ex-state Sen. Calderon seeks another delay in public corruption trial - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Jury acquits skin-care guru accused of plot against rival - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\DreamWorks Animation to cut 500 jobs - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Glendale mental health center named for L.A. County Supervisor Antonovich - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Police appear to try to tip man out of his wheelchair in video - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Billionaire Tom Steyer eyes climate change, education in potential Senate bid - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\U.S. judge rejects overtime for home health aides - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Tom Steyer's exit shifts Senate race focus to Antonio Villaraigosa - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\St. Louis officers fatally shoot man armed with loaded gun, police say - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Thousands of fiery red crabs wash ashore in Newport Beach - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Environmentalist Tom Steyer opts out of Senate race - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Boston bombing trial delayed as jury selection bogs down - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Kobe Bryant suffers torn rotator cuff in right shoulder - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\India court orders activist on 14-year hunger strike freed - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\gotham External Affairs by JOEL ROSENBLATT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\movies In Brief Judy Berlin by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\intelligencer March 6, 2000 by BETH LANDMAN KEIL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cityside The Laptop Nomads by MARK JACOBSON (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Silicon Alley 10003 by VANESSA GRIGORIADIS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Best Bets by CORKY POLLAN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\classical music Some Like It Tepid by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Song And Dance by MICHAEL TOMASKY (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\smart city Smooth Moves by ROSE-ANNE CLERMONT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\movies Man in the Muddle by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\gotham style The jean jacket, reconsidered (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\theater Love Canal by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\pop music Bold Ambition by ETHAN BROWN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\dance Roots by TOBI TOBIAS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\the city politic Screening Rooms by PEGGY EDERSHEIM KALB (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\sales & bargains Snake Charmers by SHYAMA PATEL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Brella Bar Opens on Elizabeth St. - Sleep Opens in Williamsburg - Katrina Parris Moves to Harlem - Plain Sud Closing.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\A Crime Rap Sheet on Clinton Street.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\I Am a Bird Now - New York Magazine Pop Music Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\New York's Latest Dating Fad.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Co-op Boards and Brokers Cooperate - LeFrak City Expands to East 73rd St. - The Upper Upper West Side Price Difference.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Remembering Architect Philip Johnson.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Who Failed the C Train.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Peter Hayn - New York Magazine Look Book.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Onera - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Sukhadia's - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Jersey Shore to Miami Clubs Give Us Your Dignity -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Finding Out About Merrill Lynch�s CDO Problem Was Like Getting Kicked in the Balls, Says Former CEO -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Will Apple Announce a Mass iPhone Recall Friday -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\AIG Chairman Steps Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Shoeshiner Arrested for Burning Stand Down Twice, Returns to Work Next Day -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Attorney General Candidate Eric Schneiderman Gets Attention for the Wrong Reasons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Easter to Blame for Unemployment -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Comptroller DiNapoli Is Dragged Into Hevesi State Pension-Fund Scandal -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Former NYU Chemistry Director Submitted Over $400K Worth of Fake Receipts -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Taliban�s Terrifying Army of Monkey Terrorists Effectively Ends War -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Is Banksy�s Mr. Brainwash an Art-World Borat -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Snooki to Meghan McCain Your Dad Is Hot -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Cuban Spy Gets Life in Jail -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Bet You Thought Rachel Maddow Always Defied Her Gender Norms -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Mort Zuckerman Is Not an Obama Speechwriter -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Editor & Publisher Names Journal CEO Publisher of the Year -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Obama Orders Hospital Visitation Rights for Same-Sex Partners -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Pope Breaks Silence Over Recent Church Scandals -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Movies on the TV on the Computer on... -- The Projectionist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\See Sonic Youth�s Thurston Moore �Teaching� -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Manhattan Real Estate Market Reports Closings Up, Prices Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\U.S. Officials Knew About the Russian Spies for Ten Years -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Tea Party Express Releases Hit List -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Gaga for the Hamptons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Mike Bolt, Keeper of the Stanley Cup, on His White Gloves and Hanging Out With John Cusack -- The Sports Section.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Bradley Cooper Consumed 8,000 Calories a Day -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Emma Sulkowicz to Attend State of the Union -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Most-Coveted Titles at Idea Books� New Store -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Why Can�t Hollywood Get Women Journalists Right -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\What It�s Like to Date Your Dad -- Science of Us.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Can This Congressman Make America More Zen -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\How to Revive Your Summer Stripes in Winter -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Was That a Blow-Job Bib on Empire Last Night -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Sorry, Glassholes Google to Scrap Google Glass -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\It�s Now Okay to Sleep Through a Fashion Show -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\What It�s Like to Be One of �Karl�s Boys� -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Pope Doesn�t Want You Breeding Like Rabbits -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Guys, Drake Wants to Date a Writer -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Rise and Rise and Rise of ABC�s Ben Sherwood -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\53 Historians Weigh In on Barack Obama�s Legacy -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Isaac Mizrahi Does Not Know What the Sun Is -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Remember Glitter-Bombing Is Sparkly and Illegal -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Real, the Abstract The Work of Michael Mann -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Report Frida Giannini Departed Early From Gucci -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Kendall Jenner Reflects on Her Youth for WSJ -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\One Percent Have Almost Half the World�s Wealth -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\World Pissed That Women Just Want to Be Friends -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Objects With More Chemistry Than 50 Shades Stars -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\U.K. Spy Agency Collected Journalists� Emails -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Taylor Swift Wore Jodhpurs to Whole Foods -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Solange Posted the Sweetest MLK Tribute -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Suspect in N.H. Machete Attack Regrets Girl Survived - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Delays at Two Airports Thanks to Suspicious Luggage - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Fla. Man Calls Police After Pot Plants Stolen - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Obama U.S. Intelligence Should Have 'Uncovered' Christmas Day Plot.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Canada to Use Full-Body Scanners in Airports - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\United Airlines Pilot Charged With Being Over Alcohol Limit - International News News of the World Middle East News Europe News - FOXN.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - U.S. Feared Spectacular Terror Attack at Obama's Inauguration.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Dubai Opens World's Tallest Building - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Scientists Dolphins Should Be Treated as 'Non-Human Persons'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\U.S. Embassy in Yemen Reopens After Threat - Middle East Map News Crisis - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\State of the Union Obama proposals show president on �offense� despite GOP wave Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Purported cartel hit man arrested in disappearance of 43 Mexican students Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Today is 'saddest day of the year' -- and there's a 'Blue Monday' equation that 'explains' why Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\By 2050, cancer will hardly kill anyone under 80, researchers say Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Faith, community leaders, family gather in Atlanta to celebrate Martin Luther King Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\2015 Ford Shelby GT Debuts with 625 Horsepower Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\ISIS leader orders failed fighters executed, says report Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Stanford students to see what admissions officers wrote on their applications Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Glut is expected to drive down the price of milk Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Man dies in 1,000-foot fall from Alaska mountain Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Dad gets 'no-show' bill after son, 5, misses birthday party Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Colorado lawmakers look to ban asset seizures without convictions Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Oklahoma man at center of police shooting said he never made 911 call that led to raid Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Lebanon-born porn star draws fans, death threats after performing in hijab Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Official Cuba terror listing won�t affect US push for embassy, diplomatic ties Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\'Selma' sets off new fight over Martin Luther King's contested legacy Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Report Harry Reid 'most vulnerable' in 2016, may retire Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Study used to bolster NY fracking ban developed by anti-fracking 'activists' Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Ben & Jerry�s jumps on the cookie butter trend with new ice cream flavors Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Bible story Doubts raised over a Texas inaugural tradition Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Cecilia Abbott poised to become Texas� first Latina First Lady Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Pope expected to visit Philadelphia, White House, UN on September US trip Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Military continues development of stealth hybrid motorcycle Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\America pays tribute to Martin Luther King as events held across the nation Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Officials No others involved in Argentine special prosecutor death Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Kentucky man who led 'Bonnie and Clyde' spree agrees to go home and face charges Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Volcano Towns' Future Shrouded in Uncertainty.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20001110010200_www.latimes.com_sports_updates_lakers_lat_lakers001109.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Jury Awards Smokers Millions for Ills Caused by Cigarettes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000815052707_www.latimes.com_business_20000814_t000076119.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_seats000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_nation_updates_lat_sub000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20001110010200_www.latimes.com_travel_california_lat_oakland001105.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000407195301_www.latimes.com_business_updates_lat_janitor000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_politics_elect2000_pres_lat_tension000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_mozambique000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20001110010200_www.calendarlive.com_top_1,1419,L_LATimes_Restaurants_X!PlaceDetail_35986,00.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Experts Fret Over Effect of Gene Patents on Research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Anti-Flag Trek Ends; Banner Yet Waves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Janitors' Quest Complicated by Shifting Nature of the Job.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\For Web Crowd, Stock Market's No Party Pooper.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Quackenbush Let Insurer Avoid State Investigation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_state_reports_youth_lat_kidcity000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_dems000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000407195301_www.latimes.com_living_lat_ramos000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_protest000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Makers of Smart Cards Are Betting Big on U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_costco000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Elian to Be Returned to Father Next Week, Reno Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_monarch000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Ivory Stash Driving Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050616023919_www_latimes_com_classified_realestate_hotprop_la_re_hotprop12jun12,0,7414036_story_coll_la_home_realestate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_nationworld_wire_ats_ap_top14jun15,0,2017011_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\Digging Into Seymour Hersh.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_commentary_la_oe_debellaigue15jun15,0,974217_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_la_ed_fedmay1mar01,0,7488388_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_outdoors_la_os_wildflowers1mar01,0,2946111_story_coll_la_home_outdoors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\Los Angeles Times - Postcards from Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_oe_morrison2mar02,0,3036572_column_coll_la_home_utilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_iraq_la_fg_tapes2mar02,0,2971270_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_politics_la_me_hertzlaw2mar02,0,6813645_story_coll_la_home_politics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_me_disney2mar02,0,5119900_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_calcook2mar02,0,7727608_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\Sex, Politics and President Hillary.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_nation_la_na_chicago2mar02,0,531573_story_coll_la_home_nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_mildredpierce2mar02,0,7398991_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_commentary_la_oe_bandow2mar02,0,4787970_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\Loss Strengthens Brothers' Bond.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\Judge Upholds S_D_ Mayoral Election.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\George Bush Talks Big, and He Delivers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_fi_kit2mar02,0,5314050_story_coll_la_home_local.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_sunday_commentary_la_op_faith12jun12,0,6389562_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_jews2mar02,0,5443257_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_ushaiti1mar01,0,3978550_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\Shiite Leads Iraq Vote; 3 Marines Killed.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_wire_ats_ap_top19mar02,0,181991_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Would-be LAX terrorist's prison sentence overturned as too lenient - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\AIG begins awarding abbreviated bonuses - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\FDA scolds maker of Tylenol for foot-dragging in recall - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\U.S. is coming to Haiti's aid as fast as it can, Pentagon officials say - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\China's investments in U.S. up sharply - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Obama calls for action on healthcare - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\To combat piracy, UCLA reaches for the Clicker Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Gates makes recommendations in Ft. Hood shooting case - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Bias raises gays' risk of mental disorders, witness at Prop. 8 trial says - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Quakes in Chile and Haiti raise concerns about Southland preparedness - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Education should accompany prostate screening, new guidelines say - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Woman killed by hit-and-run driver in Harbor Gateway neighborhood L.A. NOW Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Nigerian president's health remains a mystery - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\White House considers D.C. trial for Guantanamo detainee - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Aid surge gets going in Haiti - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\In Illinois, it's Kirk (R) vs Giannoulias (D) for Obama's former Senate seat Top of the Ticket Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Drug maker Johnson & Johnson paid kickbacks to mega-pharmacy, U.S. charges in civil complaint D.C. Now Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Suspect in plane bomb plot is reportedly cooperating again with authorities - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\California Medical Board admits allowing troubled doctor to monitor another - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\FDA issues warnings on food labels - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Cross found at Air Force Academy's Wicca center - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Among the gated enclaves, anger and fear over Chelsea King's killing - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Sept. 11 hijackers' Florida apartment building to be razed - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Rep. Charles Rangel temporarily steps down as Ways and Means chairman - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Apple has a flickering fix for the 27-inch iMac Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Placentia teacher charged with molestation; prosecutors seek other victims - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Yelp names BBQ spot in Big Pine, CA the best restaurant in the country - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\California lawmakers heading to policy retreats to prepare for year - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Israel's spy agency denies opposing new Iran sanctions - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Obama won't meet with Netanyahu during U.S. visit, White House says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\3 arrested, drugs, military-style weapons seized in standoff - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\City National, L.A.'s 'bank to the stars,' sold for $5.4 billion - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Are drug-carrying drones the future of smuggling Probably not, DEA says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Dodger Stadium attacker to plead guilty to federal weapons charge - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Why Will Ferrell hit a cheerleader in the head with a basketball - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\GOP operative plans anti-Steyer effort - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Ex-state Sen. Calderon seeks another delay in public corruption trial - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Jury acquits skin-care guru accused of plot against rival - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\DreamWorks Animation to cut 500 jobs - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Glendale mental health center named for L.A. County Supervisor Antonovich - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Police appear to try to tip man out of his wheelchair in video - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Billionaire Tom Steyer eyes climate change, education in potential Senate bid - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\U.S. judge rejects overtime for home health aides - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Tom Steyer's exit shifts Senate race focus to Antonio Villaraigosa - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\St. Louis officers fatally shoot man armed with loaded gun, police say - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Thousands of fiery red crabs wash ashore in Newport Beach - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Environmentalist Tom Steyer opts out of Senate race - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Boston bombing trial delayed as jury selection bogs down - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Kobe Bryant suffers torn rotator cuff in right shoulder - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\India court orders activist on 14-year hunger strike freed - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\gotham External Affairs by JOEL ROSENBLATT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\movies In Brief Judy Berlin by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\intelligencer March 6, 2000 by BETH LANDMAN KEIL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\cityside The Laptop Nomads by MARK JACOBSON (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\cover story Silicon Alley 10003 by VANESSA GRIGORIADIS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\cover story Best Bets by CORKY POLLAN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\classical music Some Like It Tepid by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\features Song And Dance by MICHAEL TOMASKY (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\smart city Smooth Moves by ROSE-ANNE CLERMONT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\movies Man in the Muddle by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\gotham style The jean jacket, reconsidered (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\theater Love Canal by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\pop music Bold Ambition by ETHAN BROWN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\dance Roots by TOBI TOBIAS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\the city politic Screening Rooms by PEGGY EDERSHEIM KALB (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\sales & bargains Snake Charmers by SHYAMA PATEL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\Brella Bar Opens on Elizabeth St. - Sleep Opens in Williamsburg - Katrina Parris Moves to Harlem - Plain Sud Closing.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\A Crime Rap Sheet on Clinton Street.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\I Am a Bird Now - New York Magazine Pop Music Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\New York's Latest Dating Fad.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\Co-op Boards and Brokers Cooperate - LeFrak City Expands to East 73rd St. - The Upper Upper West Side Price Difference.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\Remembering Architect Philip Johnson.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\Who Failed the C Train.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\Peter Hayn - New York Magazine Look Book.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\Onera - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\Sukhadia's - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Jersey Shore to Miami Clubs Give Us Your Dignity -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Finding Out About Merrill Lynch�s CDO Problem Was Like Getting Kicked in the Balls, Says Former CEO -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Will Apple Announce a Mass iPhone Recall Friday -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\AIG Chairman Steps Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Shoeshiner Arrested for Burning Stand Down Twice, Returns to Work Next Day -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Attorney General Candidate Eric Schneiderman Gets Attention for the Wrong Reasons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Easter to Blame for Unemployment -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Comptroller DiNapoli Is Dragged Into Hevesi State Pension-Fund Scandal -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Former NYU Chemistry Director Submitted Over $400K Worth of Fake Receipts -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Taliban�s Terrifying Army of Monkey Terrorists Effectively Ends War -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Is Banksy�s Mr. Brainwash an Art-World Borat -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Snooki to Meghan McCain Your Dad Is Hot -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Cuban Spy Gets Life in Jail -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Bet You Thought Rachel Maddow Always Defied Her Gender Norms -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Mort Zuckerman Is Not an Obama Speechwriter -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Editor & Publisher Names Journal CEO Publisher of the Year -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Obama Orders Hospital Visitation Rights for Same-Sex Partners -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Pope Breaks Silence Over Recent Church Scandals -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Movies on the TV on the Computer on... -- The Projectionist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\See Sonic Youth�s Thurston Moore �Teaching� -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Manhattan Real Estate Market Reports Closings Up, Prices Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\U.S. Officials Knew About the Russian Spies for Ten Years -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Tea Party Express Releases Hit List -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Gaga for the Hamptons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Mike Bolt, Keeper of the Stanley Cup, on His White Gloves and Hanging Out With John Cusack -- The Sports Section.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Bradley Cooper Consumed 8,000 Calories a Day -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Emma Sulkowicz to Attend State of the Union -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\The Most-Coveted Titles at Idea Books� New Store -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Why Can�t Hollywood Get Women Journalists Right -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\What It�s Like to Date Your Dad -- Science of Us.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Can This Congressman Make America More Zen -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\How to Revive Your Summer Stripes in Winter -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Was That a Blow-Job Bib on Empire Last Night -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Sorry, Glassholes Google to Scrap Google Glass -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\It�s Now Okay to Sleep Through a Fashion Show -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\What It�s Like to Be One of �Karl�s Boys� -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\The Pope Doesn�t Want You Breeding Like Rabbits -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Guys, Drake Wants to Date a Writer -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\The Rise and Rise and Rise of ABC�s Ben Sherwood -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\53 Historians Weigh In on Barack Obama�s Legacy -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Isaac Mizrahi Does Not Know What the Sun Is -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Remember Glitter-Bombing Is Sparkly and Illegal -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\The Real, the Abstract The Work of Michael Mann -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Report Frida Giannini Departed Early From Gucci -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Kendall Jenner Reflects on Her Youth for WSJ -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\One Percent Have Almost Half the World�s Wealth -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\World Pissed That Women Just Want to Be Friends -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Objects With More Chemistry Than 50 Shades Stars -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\U.K. Spy Agency Collected Journalists� Emails -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Taylor Swift Wore Jodhpurs to Whole Foods -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Solange Posted the Sweetest MLK Tribute -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt.txt
Traceback (most recent call last):
File "<pyshell#31>", line 16, in <module>
prep_data = take_measurements(prep_data)
File "<pyshell#26>", line 108, in take_measurements
content = tokens_to_hist_extractor(lambda x: open(x), os.path.abspath(val['testpath']))
File "<pyshell#26>", line 93, in tokens_to_hist_extractor
content = extract(data_filepath).read()
File "<pyshell#26>", line 108, in <lambda>
content = tokens_to_hist_extractor(lambda x: open(x), os.path.abspath(val['testpath']))
IOError: [Errno 2] No such file or directory: 'c:\\crawlToTheFuture\\crawl-to-the-future\\testing\\wbce-tests\\ContentCodeBlurringFilter\\entertainment.msn.com\\2010\\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt.txt'
[About 4572 more lines. Double-click to unfold]
>>> wcbe_path = 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'
... test_dir = 'c:/crawlToTheFuture/crawl-to-the-future/dataset/'
...
... goldset_folders = listsubdir(listsubdir([test_dir]))
... wcbe_subdirs = listsubdir([wcbe_path])
...
... for wcbe_test in wcbe_subdirs:
...
... testset_folders = listsubdir(listsubdir([wcbe_test]))
...
... linkquotatest = []
...
... for i in range(len(testset_folders)):
...
... prep_data = prepare_for_measurements(goldset_folders[i], testset_folders[i], testfile_ext='.txt')
... prep_data = take_measurements(prep_data)
... linkquotatest.append(prep_data)
...
...
... pickle.dump(linkquotatest,open(os.path.join(wcbe_test,'results.pkl'),'wb'))
[About 19 more lines. Double-click to unfold]
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Suspect in N.H. Machete Attack Regrets Girl Survived - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Delays at Two Airports Thanks to Suspicious Luggage - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Fla. Man Calls Police After Pot Plants Stolen - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Obama U.S. Intelligence Should Have 'Uncovered' Christmas Day Plot.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Canada to Use Full-Body Scanners in Airports - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\United Airlines Pilot Charged With Being Over Alcohol Limit - International News News of the World Middle East News Europe News - FOXN.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - U.S. Feared Spectacular Terror Attack at Obama's Inauguration.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\Dubai Opens World's Tallest Building - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\FOXNews.com - Scientists Dolphins Should Be Treated as 'Non-Human Persons'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2010\U.S. Embassy in Yemen Reopens After Threat - Middle East Map News Crisis - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\State of the Union Obama proposals show president on �offense� despite GOP wave Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Purported cartel hit man arrested in disappearance of 43 Mexican students Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Today is 'saddest day of the year' -- and there's a 'Blue Monday' equation that 'explains' why Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\By 2050, cancer will hardly kill anyone under 80, researchers say Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Faith, community leaders, family gather in Atlanta to celebrate Martin Luther King Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\2015 Ford Shelby GT Debuts with 625 Horsepower Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\ISIS leader orders failed fighters executed, says report Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Stanford students to see what admissions officers wrote on their applications Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Glut is expected to drive down the price of milk Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Man dies in 1,000-foot fall from Alaska mountain Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Dad gets 'no-show' bill after son, 5, misses birthday party Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Colorado lawmakers look to ban asset seizures without convictions Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Oklahoma man at center of police shooting said he never made 911 call that led to raid Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Lebanon-born porn star draws fans, death threats after performing in hijab Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Official Cuba terror listing won�t affect US push for embassy, diplomatic ties Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\'Selma' sets off new fight over Martin Luther King's contested legacy Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Report Harry Reid 'most vulnerable' in 2016, may retire Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Study used to bolster NY fracking ban developed by anti-fracking 'activists' Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Ben & Jerry�s jumps on the cookie butter trend with new ice cream flavors Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Bible story Doubts raised over a Texas inaugural tradition Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Cecilia Abbott poised to become Texas� first Latina First Lady Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Pope expected to visit Philadelphia, White House, UN on September US trip Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Military continues development of stealth hybrid motorcycle Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\America pays tribute to Martin Luther King as events held across the nation Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Officials No others involved in Argentine special prosecutor death Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.foxnews.com\2015\Kentucky man who led 'Bonnie and Clyde' spree agrees to go home and face charges Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Volcano Towns' Future Shrouded in Uncertainty.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20001110010200_www.latimes.com_sports_updates_lakers_lat_lakers001109.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Jury Awards Smokers Millions for Ills Caused by Cigarettes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_business_20000814_t000076119.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_seats000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_nation_updates_lat_sub000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20001110010200_www.latimes.com_travel_california_lat_oakland001105.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000407195301_www.latimes.com_business_updates_lat_janitor000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_politics_elect2000_pres_lat_tension000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_mozambique000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20001110010200_www.calendarlive.com_top_1,1419,L_LATimes_Restaurants_X!PlaceDetail_35986,00.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Experts Fret Over Effect of Gene Patents on Research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Anti-Flag Trek Ends; Banner Yet Waves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Janitors' Quest Complicated by Shifting Nature of the Job.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\For Web Crowd, Stock Market's No Party Pooper.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Quackenbush Let Insurer Avoid State Investigation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_state_reports_youth_lat_kidcity000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_dems000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000407195301_www.latimes.com_living_lat_ramos000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_protest000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Makers of Smart Cards Are Betting Big on U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_costco000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Elian to Be Returned to Father Next Week, Reno Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_monarch000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2000\Ivory Stash Driving Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_classified_realestate_hotprop_la_re_hotprop12jun12,0,7414036_story_coll_la_home_realestate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_news_nationworld_wire_ats_ap_top14jun15,0,2017011_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Digging Into Seymour Hersh.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_commentary_la_oe_debellaigue15jun15,0,974217_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_la_ed_fedmay1mar01,0,7488388_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_features_outdoors_la_os_wildflowers1mar01,0,2946111_story_coll_la_home_outdoors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Los Angeles Times - Postcards from Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_oe_morrison2mar02,0,3036572_column_coll_la_home_utilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_iraq_la_fg_tapes2mar02,0,2971270_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_politics_la_me_hertzlaw2mar02,0,6813645_story_coll_la_home_politics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_me_disney2mar02,0,5119900_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_calcook2mar02,0,7727608_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Sex, Politics and President Hillary.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_nation_la_na_chicago2mar02,0,531573_story_coll_la_home_nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_mildredpierce2mar02,0,7398991_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_commentary_la_oe_bandow2mar02,0,4787970_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Loss Strengthens Brothers' Bond.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Judge Upholds S_D_ Mayoral Election.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\George Bush Talks Big, and He Delivers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_fi_kit2mar02,0,5314050_story_coll_la_home_local.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_sunday_commentary_la_op_faith12jun12,0,6389562_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_jews2mar02,0,5443257_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_ushaiti1mar01,0,3978550_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\Shiite Leads Iraq Vote; 3 Marines Killed.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_wire_ats_ap_top19mar02,0,181991_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Would-be LAX terrorist's prison sentence overturned as too lenient - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\AIG begins awarding abbreviated bonuses - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\FDA scolds maker of Tylenol for foot-dragging in recall - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\U.S. is coming to Haiti's aid as fast as it can, Pentagon officials say - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\China's investments in U.S. up sharply - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Obama calls for action on healthcare - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\To combat piracy, UCLA reaches for the Clicker Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Gates makes recommendations in Ft. Hood shooting case - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Bias raises gays' risk of mental disorders, witness at Prop. 8 trial says - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Quakes in Chile and Haiti raise concerns about Southland preparedness - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Education should accompany prostate screening, new guidelines say - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Woman killed by hit-and-run driver in Harbor Gateway neighborhood L.A. NOW Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Nigerian president's health remains a mystery - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\White House considers D.C. trial for Guantanamo detainee - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Aid surge gets going in Haiti - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\In Illinois, it's Kirk (R) vs Giannoulias (D) for Obama's former Senate seat Top of the Ticket Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Drug maker Johnson & Johnson paid kickbacks to mega-pharmacy, U.S. charges in civil complaint D.C. Now Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Suspect in plane bomb plot is reportedly cooperating again with authorities - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\California Medical Board admits allowing troubled doctor to monitor another - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\FDA issues warnings on food labels - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Cross found at Air Force Academy's Wicca center - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Among the gated enclaves, anger and fear over Chelsea King's killing - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Sept. 11 hijackers' Florida apartment building to be razed - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Rep. Charles Rangel temporarily steps down as Ways and Means chairman - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2010\Apple has a flickering fix for the 27-inch iMac Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Placentia teacher charged with molestation; prosecutors seek other victims - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Yelp names BBQ spot in Big Pine, CA the best restaurant in the country - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\California lawmakers heading to policy retreats to prepare for year - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Israel's spy agency denies opposing new Iran sanctions - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Obama won't meet with Netanyahu during U.S. visit, White House says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\3 arrested, drugs, military-style weapons seized in standoff - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\City National, L.A.'s 'bank to the stars,' sold for $5.4 billion - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Are drug-carrying drones the future of smuggling Probably not, DEA says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Dodger Stadium attacker to plead guilty to federal weapons charge - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Why Will Ferrell hit a cheerleader in the head with a basketball - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\GOP operative plans anti-Steyer effort - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Ex-state Sen. Calderon seeks another delay in public corruption trial - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Jury acquits skin-care guru accused of plot against rival - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\DreamWorks Animation to cut 500 jobs - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Glendale mental health center named for L.A. County Supervisor Antonovich - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Police appear to try to tip man out of his wheelchair in video - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Billionaire Tom Steyer eyes climate change, education in potential Senate bid - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\U.S. judge rejects overtime for home health aides - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Tom Steyer's exit shifts Senate race focus to Antonio Villaraigosa - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\St. Louis officers fatally shoot man armed with loaded gun, police say - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Thousands of fiery red crabs wash ashore in Newport Beach - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Environmentalist Tom Steyer opts out of Senate race - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Boston bombing trial delayed as jury selection bogs down - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\Kobe Bryant suffers torn rotator cuff in right shoulder - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.latimes.com\2015\India court orders activist on 14-year hunger strike freed - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\gotham External Affairs by JOEL ROSENBLATT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\movies In Brief Judy Berlin by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\intelligencer March 6, 2000 by BETH LANDMAN KEIL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cityside The Laptop Nomads by MARK JACOBSON (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Silicon Alley 10003 by VANESSA GRIGORIADIS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Best Bets by CORKY POLLAN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\classical music Some Like It Tepid by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\features Song And Dance by MICHAEL TOMASKY (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\smart city Smooth Moves by ROSE-ANNE CLERMONT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\movies Man in the Muddle by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\gotham style The jean jacket, reconsidered (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\theater Love Canal by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\pop music Bold Ambition by ETHAN BROWN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\dance Roots by TOBI TOBIAS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\the city politic Screening Rooms by PEGGY EDERSHEIM KALB (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2000\sales & bargains Snake Charmers by SHYAMA PATEL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Brella Bar Opens on Elizabeth St. - Sleep Opens in Williamsburg - Katrina Parris Moves to Harlem - Plain Sud Closing.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\A Crime Rap Sheet on Clinton Street.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\I Am a Bird Now - New York Magazine Pop Music Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\New York's Latest Dating Fad.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Co-op Boards and Brokers Cooperate - LeFrak City Expands to East 73rd St. - The Upper Upper West Side Price Difference.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Remembering Architect Philip Johnson.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Who Failed the C Train.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Peter Hayn - New York Magazine Look Book.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Onera - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\Sukhadia's - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Jersey Shore to Miami Clubs Give Us Your Dignity -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Finding Out About Merrill Lynch�s CDO Problem Was Like Getting Kicked in the Balls, Says Former CEO -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Will Apple Announce a Mass iPhone Recall Friday -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\AIG Chairman Steps Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Shoeshiner Arrested for Burning Stand Down Twice, Returns to Work Next Day -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Attorney General Candidate Eric Schneiderman Gets Attention for the Wrong Reasons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Easter to Blame for Unemployment -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Comptroller DiNapoli Is Dragged Into Hevesi State Pension-Fund Scandal -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Former NYU Chemistry Director Submitted Over $400K Worth of Fake Receipts -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Taliban�s Terrifying Army of Monkey Terrorists Effectively Ends War -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Is Banksy�s Mr. Brainwash an Art-World Borat -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Snooki to Meghan McCain Your Dad Is Hot -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Cuban Spy Gets Life in Jail -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Bet You Thought Rachel Maddow Always Defied Her Gender Norms -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Mort Zuckerman Is Not an Obama Speechwriter -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Editor & Publisher Names Journal CEO Publisher of the Year -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Obama Orders Hospital Visitation Rights for Same-Sex Partners -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Pope Breaks Silence Over Recent Church Scandals -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Movies on the TV on the Computer on... -- The Projectionist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\See Sonic Youth�s Thurston Moore �Teaching� -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Manhattan Real Estate Market Reports Closings Up, Prices Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\U.S. Officials Knew About the Russian Spies for Ten Years -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Tea Party Express Releases Hit List -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Gaga for the Hamptons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2010\Mike Bolt, Keeper of the Stanley Cup, on His White Gloves and Hanging Out With John Cusack -- The Sports Section.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Bradley Cooper Consumed 8,000 Calories a Day -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Emma Sulkowicz to Attend State of the Union -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Most-Coveted Titles at Idea Books� New Store -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Why Can�t Hollywood Get Women Journalists Right -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\What It�s Like to Date Your Dad -- Science of Us.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Can This Congressman Make America More Zen -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\How to Revive Your Summer Stripes in Winter -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Was That a Blow-Job Bib on Empire Last Night -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Sorry, Glassholes Google to Scrap Google Glass -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\It�s Now Okay to Sleep Through a Fashion Show -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\What It�s Like to Be One of �Karl�s Boys� -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Pope Doesn�t Want You Breeding Like Rabbits -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Guys, Drake Wants to Date a Writer -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Rise and Rise and Rise of ABC�s Ben Sherwood -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\53 Historians Weigh In on Barack Obama�s Legacy -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Isaac Mizrahi Does Not Know What the Sun Is -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Remember Glitter-Bombing Is Sparkly and Illegal -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\The Real, the Abstract The Work of Michael Mann -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Report Frida Giannini Departed Early From Gucci -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Kendall Jenner Reflects on Her Youth for WSJ -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\One Percent Have Almost Half the World�s Wealth -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\World Pissed That Women Just Want to Be Friends -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Objects With More Chemistry Than 50 Shades Stars -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\U.K. Spy Agency Collected Journalists� Emails -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Taylor Swift Wore Jodhpurs to Whole Foods -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractor2Filter\www.nymag.com\2015\Solange Posted the Sweetest MLK Tribute -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Suspect in N.H. Machete Attack Regrets Girl Survived - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Delays at Two Airports Thanks to Suspicious Luggage - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Fla. Man Calls Police After Pot Plants Stolen - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Obama U.S. Intelligence Should Have 'Uncovered' Christmas Day Plot.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Canada to Use Full-Body Scanners in Airports - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\United Airlines Pilot Charged With Being Over Alcohol Limit - International News News of the World Middle East News Europe News - FOXN.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - U.S. Feared Spectacular Terror Attack at Obama's Inauguration.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\Dubai Opens World's Tallest Building - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\FOXNews.com - Scientists Dolphins Should Be Treated as 'Non-Human Persons'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2010\U.S. Embassy in Yemen Reopens After Threat - Middle East Map News Crisis - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\State of the Union Obama proposals show president on �offense� despite GOP wave Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Purported cartel hit man arrested in disappearance of 43 Mexican students Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Today is 'saddest day of the year' -- and there's a 'Blue Monday' equation that 'explains' why Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\By 2050, cancer will hardly kill anyone under 80, researchers say Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Faith, community leaders, family gather in Atlanta to celebrate Martin Luther King Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\2015 Ford Shelby GT Debuts with 625 Horsepower Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\ISIS leader orders failed fighters executed, says report Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Stanford students to see what admissions officers wrote on their applications Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Glut is expected to drive down the price of milk Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Man dies in 1,000-foot fall from Alaska mountain Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Dad gets 'no-show' bill after son, 5, misses birthday party Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Colorado lawmakers look to ban asset seizures without convictions Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Oklahoma man at center of police shooting said he never made 911 call that led to raid Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Lebanon-born porn star draws fans, death threats after performing in hijab Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Official Cuba terror listing won�t affect US push for embassy, diplomatic ties Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\'Selma' sets off new fight over Martin Luther King's contested legacy Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Report Harry Reid 'most vulnerable' in 2016, may retire Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Study used to bolster NY fracking ban developed by anti-fracking 'activists' Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Ben & Jerry�s jumps on the cookie butter trend with new ice cream flavors Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Bible story Doubts raised over a Texas inaugural tradition Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Cecilia Abbott poised to become Texas� first Latina First Lady Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Pope expected to visit Philadelphia, White House, UN on September US trip Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Military continues development of stealth hybrid motorcycle Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\America pays tribute to Martin Luther King as events held across the nation Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Officials No others involved in Argentine special prosecutor death Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.foxnews.com\2015\Kentucky man who led 'Bonnie and Clyde' spree agrees to go home and face charges Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Volcano Towns' Future Shrouded in Uncertainty.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20001110010200_www.latimes.com_sports_updates_lakers_lat_lakers001109.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Jury Awards Smokers Millions for Ills Caused by Cigarettes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000815052707_www.latimes.com_business_20000814_t000076119.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_seats000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_nation_updates_lat_sub000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20001110010200_www.latimes.com_travel_california_lat_oakland001105.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000407195301_www.latimes.com_business_updates_lat_janitor000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_politics_elect2000_pres_lat_tension000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_mozambique000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20001110010200_www.calendarlive.com_top_1,1419,L_LATimes_Restaurants_X!PlaceDetail_35986,00.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Experts Fret Over Effect of Gene Patents on Research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Anti-Flag Trek Ends; Banner Yet Waves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Janitors' Quest Complicated by Shifting Nature of the Job.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\For Web Crowd, Stock Market's No Party Pooper.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Quackenbush Let Insurer Avoid State Investigation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_state_reports_youth_lat_kidcity000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_dems000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000407195301_www.latimes.com_living_lat_ramos000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_protest000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Makers of Smart Cards Are Betting Big on U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_costco000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Elian to Be Returned to Father Next Week, Reno Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_monarch000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2000\Ivory Stash Driving Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050616023919_www_latimes_com_classified_realestate_hotprop_la_re_hotprop12jun12,0,7414036_story_coll_la_home_realestate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_nationworld_wire_ats_ap_top14jun15,0,2017011_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\Digging Into Seymour Hersh.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_commentary_la_oe_debellaigue15jun15,0,974217_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_la_ed_fedmay1mar01,0,7488388_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_outdoors_la_os_wildflowers1mar01,0,2946111_story_coll_la_home_outdoors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\Los Angeles Times - Postcards from Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_oe_morrison2mar02,0,3036572_column_coll_la_home_utilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_iraq_la_fg_tapes2mar02,0,2971270_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_politics_la_me_hertzlaw2mar02,0,6813645_story_coll_la_home_politics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_me_disney2mar02,0,5119900_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_calcook2mar02,0,7727608_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\Sex, Politics and President Hillary.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_nation_la_na_chicago2mar02,0,531573_story_coll_la_home_nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_mildredpierce2mar02,0,7398991_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_commentary_la_oe_bandow2mar02,0,4787970_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\Loss Strengthens Brothers' Bond.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\Judge Upholds S_D_ Mayoral Election.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\George Bush Talks Big, and He Delivers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_fi_kit2mar02,0,5314050_story_coll_la_home_local.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_sunday_commentary_la_op_faith12jun12,0,6389562_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_jews2mar02,0,5443257_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_ushaiti1mar01,0,3978550_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\Shiite Leads Iraq Vote; 3 Marines Killed.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_wire_ats_ap_top19mar02,0,181991_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Would-be LAX terrorist's prison sentence overturned as too lenient - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\AIG begins awarding abbreviated bonuses - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\FDA scolds maker of Tylenol for foot-dragging in recall - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\U.S. is coming to Haiti's aid as fast as it can, Pentagon officials say - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\China's investments in U.S. up sharply - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Obama calls for action on healthcare - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\To combat piracy, UCLA reaches for the Clicker Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Gates makes recommendations in Ft. Hood shooting case - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Bias raises gays' risk of mental disorders, witness at Prop. 8 trial says - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Quakes in Chile and Haiti raise concerns about Southland preparedness - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Education should accompany prostate screening, new guidelines say - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Woman killed by hit-and-run driver in Harbor Gateway neighborhood L.A. NOW Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Nigerian president's health remains a mystery - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\White House considers D.C. trial for Guantanamo detainee - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Aid surge gets going in Haiti - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\In Illinois, it's Kirk (R) vs Giannoulias (D) for Obama's former Senate seat Top of the Ticket Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Drug maker Johnson & Johnson paid kickbacks to mega-pharmacy, U.S. charges in civil complaint D.C. Now Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Suspect in plane bomb plot is reportedly cooperating again with authorities - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\California Medical Board admits allowing troubled doctor to monitor another - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\FDA issues warnings on food labels - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Cross found at Air Force Academy's Wicca center - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Among the gated enclaves, anger and fear over Chelsea King's killing - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Sept. 11 hijackers' Florida apartment building to be razed - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Rep. Charles Rangel temporarily steps down as Ways and Means chairman - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2010\Apple has a flickering fix for the 27-inch iMac Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Placentia teacher charged with molestation; prosecutors seek other victims - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Yelp names BBQ spot in Big Pine, CA the best restaurant in the country - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\California lawmakers heading to policy retreats to prepare for year - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Israel's spy agency denies opposing new Iran sanctions - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Obama won't meet with Netanyahu during U.S. visit, White House says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\3 arrested, drugs, military-style weapons seized in standoff - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\City National, L.A.'s 'bank to the stars,' sold for $5.4 billion - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Are drug-carrying drones the future of smuggling Probably not, DEA says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Dodger Stadium attacker to plead guilty to federal weapons charge - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Why Will Ferrell hit a cheerleader in the head with a basketball - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\GOP operative plans anti-Steyer effort - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Ex-state Sen. Calderon seeks another delay in public corruption trial - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Jury acquits skin-care guru accused of plot against rival - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\DreamWorks Animation to cut 500 jobs - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Glendale mental health center named for L.A. County Supervisor Antonovich - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Police appear to try to tip man out of his wheelchair in video - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Billionaire Tom Steyer eyes climate change, education in potential Senate bid - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\U.S. judge rejects overtime for home health aides - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Tom Steyer's exit shifts Senate race focus to Antonio Villaraigosa - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\St. Louis officers fatally shoot man armed with loaded gun, police say - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Thousands of fiery red crabs wash ashore in Newport Beach - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Environmentalist Tom Steyer opts out of Senate race - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Boston bombing trial delayed as jury selection bogs down - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\Kobe Bryant suffers torn rotator cuff in right shoulder - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.latimes.com\2015\India court orders activist on 14-year hunger strike freed - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\gotham External Affairs by JOEL ROSENBLATT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\movies In Brief Judy Berlin by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\intelligencer March 6, 2000 by BETH LANDMAN KEIL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\cityside The Laptop Nomads by MARK JACOBSON (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\cover story Silicon Alley 10003 by VANESSA GRIGORIADIS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\cover story Best Bets by CORKY POLLAN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\classical music Some Like It Tepid by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\features Song And Dance by MICHAEL TOMASKY (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\smart city Smooth Moves by ROSE-ANNE CLERMONT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\movies Man in the Muddle by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\gotham style The jean jacket, reconsidered (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\theater Love Canal by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\pop music Bold Ambition by ETHAN BROWN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\dance Roots by TOBI TOBIAS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\the city politic Screening Rooms by PEGGY EDERSHEIM KALB (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2000\sales & bargains Snake Charmers by SHYAMA PATEL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\Brella Bar Opens on Elizabeth St. - Sleep Opens in Williamsburg - Katrina Parris Moves to Harlem - Plain Sud Closing.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\A Crime Rap Sheet on Clinton Street.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\I Am a Bird Now - New York Magazine Pop Music Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\New York's Latest Dating Fad.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\Co-op Boards and Brokers Cooperate - LeFrak City Expands to East 73rd St. - The Upper Upper West Side Price Difference.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\Remembering Architect Philip Johnson.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\Who Failed the C Train.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\Peter Hayn - New York Magazine Look Book.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\Onera - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\Sukhadia's - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Jersey Shore to Miami Clubs Give Us Your Dignity -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Finding Out About Merrill Lynch�s CDO Problem Was Like Getting Kicked in the Balls, Says Former CEO -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Will Apple Announce a Mass iPhone Recall Friday -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\AIG Chairman Steps Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Shoeshiner Arrested for Burning Stand Down Twice, Returns to Work Next Day -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Attorney General Candidate Eric Schneiderman Gets Attention for the Wrong Reasons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Easter to Blame for Unemployment -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Comptroller DiNapoli Is Dragged Into Hevesi State Pension-Fund Scandal -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Former NYU Chemistry Director Submitted Over $400K Worth of Fake Receipts -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Taliban�s Terrifying Army of Monkey Terrorists Effectively Ends War -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Is Banksy�s Mr. Brainwash an Art-World Borat -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Snooki to Meghan McCain Your Dad Is Hot -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Cuban Spy Gets Life in Jail -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Bet You Thought Rachel Maddow Always Defied Her Gender Norms -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Mort Zuckerman Is Not an Obama Speechwriter -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Editor & Publisher Names Journal CEO Publisher of the Year -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Obama Orders Hospital Visitation Rights for Same-Sex Partners -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Pope Breaks Silence Over Recent Church Scandals -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Movies on the TV on the Computer on... -- The Projectionist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\See Sonic Youth�s Thurston Moore �Teaching� -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Manhattan Real Estate Market Reports Closings Up, Prices Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\U.S. Officials Knew About the Russian Spies for Ten Years -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Tea Party Express Releases Hit List -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Gaga for the Hamptons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2010\Mike Bolt, Keeper of the Stanley Cup, on His White Gloves and Hanging Out With John Cusack -- The Sports Section.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Bradley Cooper Consumed 8,000 Calories a Day -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Emma Sulkowicz to Attend State of the Union -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\The Most-Coveted Titles at Idea Books� New Store -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Why Can�t Hollywood Get Women Journalists Right -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\What It�s Like to Date Your Dad -- Science of Us.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Can This Congressman Make America More Zen -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\How to Revive Your Summer Stripes in Winter -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Was That a Blow-Job Bib on Empire Last Night -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Sorry, Glassholes Google to Scrap Google Glass -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\It�s Now Okay to Sleep Through a Fashion Show -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\What It�s Like to Be One of �Karl�s Boys� -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\The Pope Doesn�t Want You Breeding Like Rabbits -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Guys, Drake Wants to Date a Writer -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\The Rise and Rise and Rise of ABC�s Ben Sherwood -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\53 Historians Weigh In on Barack Obama�s Legacy -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Isaac Mizrahi Does Not Know What the Sun Is -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Remember Glitter-Bombing Is Sparkly and Illegal -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\The Real, the Abstract The Work of Michael Mann -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Report Frida Giannini Departed Early From Gucci -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Kendall Jenner Reflects on Her Youth for WSJ -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\One Percent Have Almost Half the World�s Wealth -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\World Pissed That Women Just Want to Be Friends -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Objects With More Chemistry Than 50 Shades Stars -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\U.K. Spy Agency Collected Journalists� Emails -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Taylor Swift Wore Jodhpurs to Whole Foods -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\BodyTextExtractorFilter\www.nymag.com\2015\Solange Posted the Sweetest MLK Tribute -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\Suspect in N.H. Machete Attack Regrets Girl Survived - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\Delays at Two Airports Thanks to Suspicious Luggage - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\Fla. Man Calls Police After Pot Plants Stolen - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\FOXNews.com - Obama U.S. Intelligence Should Have 'Uncovered' Christmas Day Plot.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\Canada to Use Full-Body Scanners in Airports - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\United Airlines Pilot Charged With Being Over Alcohol Limit - International News News of the World Middle East News Europe News - FOXN.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\FOXNews.com - U.S. Feared Spectacular Terror Attack at Obama's Inauguration.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\Dubai Opens World's Tallest Building - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\FOXNews.com - Scientists Dolphins Should Be Treated as 'Non-Human Persons'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2010\U.S. Embassy in Yemen Reopens After Threat - Middle East Map News Crisis - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\State of the Union Obama proposals show president on �offense� despite GOP wave Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Purported cartel hit man arrested in disappearance of 43 Mexican students Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Today is 'saddest day of the year' -- and there's a 'Blue Monday' equation that 'explains' why Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\By 2050, cancer will hardly kill anyone under 80, researchers say Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Faith, community leaders, family gather in Atlanta to celebrate Martin Luther King Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\2015 Ford Shelby GT Debuts with 625 Horsepower Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\ISIS leader orders failed fighters executed, says report Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Stanford students to see what admissions officers wrote on their applications Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Glut is expected to drive down the price of milk Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Man dies in 1,000-foot fall from Alaska mountain Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Dad gets 'no-show' bill after son, 5, misses birthday party Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Colorado lawmakers look to ban asset seizures without convictions Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Oklahoma man at center of police shooting said he never made 911 call that led to raid Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Lebanon-born porn star draws fans, death threats after performing in hijab Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Official Cuba terror listing won�t affect US push for embassy, diplomatic ties Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\'Selma' sets off new fight over Martin Luther King's contested legacy Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Report Harry Reid 'most vulnerable' in 2016, may retire Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Study used to bolster NY fracking ban developed by anti-fracking 'activists' Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Ben & Jerry�s jumps on the cookie butter trend with new ice cream flavors Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Bible story Doubts raised over a Texas inaugural tradition Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Cecilia Abbott poised to become Texas� first Latina First Lady Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Pope expected to visit Philadelphia, White House, UN on September US trip Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Military continues development of stealth hybrid motorcycle Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\America pays tribute to Martin Luther King as events held across the nation Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Officials No others involved in Argentine special prosecutor death Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.foxnews.com\2015\Kentucky man who led 'Bonnie and Clyde' spree agrees to go home and face charges Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\Volcano Towns' Future Shrouded in Uncertainty.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20001110010200_www.latimes.com_sports_updates_lakers_lat_lakers001109.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\Jury Awards Smokers Millions for Ills Caused by Cigarettes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20000815052707_www.latimes.com_business_20000814_t000076119.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_seats000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_nation_updates_lat_sub000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20001110010200_www.latimes.com_travel_california_lat_oakland001105.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20000407195301_www.latimes.com_business_updates_lat_janitor000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_politics_elect2000_pres_lat_tension000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_mozambique000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20001110010200_www.calendarlive.com_top_1,1419,L_LATimes_Restaurants_X!PlaceDetail_35986,00.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\Experts Fret Over Effect of Gene Patents on Research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\Anti-Flag Trek Ends; Banner Yet Waves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\Janitors' Quest Complicated by Shifting Nature of the Job.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\For Web Crowd, Stock Market's No Party Pooper.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\Quackenbush Let Insurer Avoid State Investigation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_state_reports_youth_lat_kidcity000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_dems000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20000407195301_www.latimes.com_living_lat_ramos000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_protest000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\Makers of Smart Cards Are Betting Big on U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_costco000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\Elian to Be Returned to Father Next Week, Reno Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_monarch000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2000\Ivory Stash Driving Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050616023919_www_latimes_com_classified_realestate_hotprop_la_re_hotprop12jun12,0,7414036_story_coll_la_home_realestate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_nationworld_wire_ats_ap_top14jun15,0,2017011_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\Digging Into Seymour Hersh.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_commentary_la_oe_debellaigue15jun15,0,974217_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_la_ed_fedmay1mar01,0,7488388_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_outdoors_la_os_wildflowers1mar01,0,2946111_story_coll_la_home_outdoors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\Los Angeles Times - Postcards from Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_oe_morrison2mar02,0,3036572_column_coll_la_home_utilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_iraq_la_fg_tapes2mar02,0,2971270_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_politics_la_me_hertzlaw2mar02,0,6813645_story_coll_la_home_politics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_me_disney2mar02,0,5119900_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_calcook2mar02,0,7727608_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\Sex, Politics and President Hillary.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_nation_la_na_chicago2mar02,0,531573_story_coll_la_home_nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_mildredpierce2mar02,0,7398991_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_commentary_la_oe_bandow2mar02,0,4787970_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\Loss Strengthens Brothers' Bond.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\Judge Upholds S_D_ Mayoral Election.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\George Bush Talks Big, and He Delivers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_fi_kit2mar02,0,5314050_story_coll_la_home_local.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_sunday_commentary_la_op_faith12jun12,0,6389562_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_jews2mar02,0,5443257_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_ushaiti1mar01,0,3978550_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\Shiite Leads Iraq Vote; 3 Marines Killed.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_wire_ats_ap_top19mar02,0,181991_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Would-be LAX terrorist's prison sentence overturned as too lenient - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\AIG begins awarding abbreviated bonuses - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\FDA scolds maker of Tylenol for foot-dragging in recall - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\U.S. is coming to Haiti's aid as fast as it can, Pentagon officials say - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\China's investments in U.S. up sharply - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Obama calls for action on healthcare - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\To combat piracy, UCLA reaches for the Clicker Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Gates makes recommendations in Ft. Hood shooting case - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Bias raises gays' risk of mental disorders, witness at Prop. 8 trial says - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Quakes in Chile and Haiti raise concerns about Southland preparedness - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Education should accompany prostate screening, new guidelines say - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Woman killed by hit-and-run driver in Harbor Gateway neighborhood L.A. NOW Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Nigerian president's health remains a mystery - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\White House considers D.C. trial for Guantanamo detainee - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Aid surge gets going in Haiti - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\In Illinois, it's Kirk (R) vs Giannoulias (D) for Obama's former Senate seat Top of the Ticket Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Drug maker Johnson & Johnson paid kickbacks to mega-pharmacy, U.S. charges in civil complaint D.C. Now Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Suspect in plane bomb plot is reportedly cooperating again with authorities - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\California Medical Board admits allowing troubled doctor to monitor another - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\FDA issues warnings on food labels - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Cross found at Air Force Academy's Wicca center - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Among the gated enclaves, anger and fear over Chelsea King's killing - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Sept. 11 hijackers' Florida apartment building to be razed - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Rep. Charles Rangel temporarily steps down as Ways and Means chairman - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2010\Apple has a flickering fix for the 27-inch iMac Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Placentia teacher charged with molestation; prosecutors seek other victims - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Yelp names BBQ spot in Big Pine, CA the best restaurant in the country - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\California lawmakers heading to policy retreats to prepare for year - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Israel's spy agency denies opposing new Iran sanctions - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Obama won't meet with Netanyahu during U.S. visit, White House says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\3 arrested, drugs, military-style weapons seized in standoff - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\City National, L.A.'s 'bank to the stars,' sold for $5.4 billion - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Are drug-carrying drones the future of smuggling Probably not, DEA says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Dodger Stadium attacker to plead guilty to federal weapons charge - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Why Will Ferrell hit a cheerleader in the head with a basketball - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\GOP operative plans anti-Steyer effort - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Ex-state Sen. Calderon seeks another delay in public corruption trial - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Jury acquits skin-care guru accused of plot against rival - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\DreamWorks Animation to cut 500 jobs - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Glendale mental health center named for L.A. County Supervisor Antonovich - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Police appear to try to tip man out of his wheelchair in video - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Billionaire Tom Steyer eyes climate change, education in potential Senate bid - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\U.S. judge rejects overtime for home health aides - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Tom Steyer's exit shifts Senate race focus to Antonio Villaraigosa - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\St. Louis officers fatally shoot man armed with loaded gun, police say - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Thousands of fiery red crabs wash ashore in Newport Beach - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Environmentalist Tom Steyer opts out of Senate race - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Boston bombing trial delayed as jury selection bogs down - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\Kobe Bryant suffers torn rotator cuff in right shoulder - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.latimes.com\2015\India court orders activist on 14-year hunger strike freed - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\gotham External Affairs by JOEL ROSENBLATT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\movies In Brief Judy Berlin by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\intelligencer March 6, 2000 by BETH LANDMAN KEIL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\cityside The Laptop Nomads by MARK JACOBSON (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\cover story Silicon Alley 10003 by VANESSA GRIGORIADIS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\cover story Best Bets by CORKY POLLAN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\classical music Some Like It Tepid by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\features Song And Dance by MICHAEL TOMASKY (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\smart city Smooth Moves by ROSE-ANNE CLERMONT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\movies Man in the Muddle by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\gotham style The jean jacket, reconsidered (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\theater Love Canal by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\pop music Bold Ambition by ETHAN BROWN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\dance Roots by TOBI TOBIAS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\the city politic Screening Rooms by PEGGY EDERSHEIM KALB (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2000\sales & bargains Snake Charmers by SHYAMA PATEL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\Brella Bar Opens on Elizabeth St. - Sleep Opens in Williamsburg - Katrina Parris Moves to Harlem - Plain Sud Closing.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\A Crime Rap Sheet on Clinton Street.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\I Am a Bird Now - New York Magazine Pop Music Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\New York's Latest Dating Fad.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\Co-op Boards and Brokers Cooperate - LeFrak City Expands to East 73rd St. - The Upper Upper West Side Price Difference.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\Remembering Architect Philip Johnson.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\Who Failed the C Train.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\Peter Hayn - New York Magazine Look Book.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\Onera - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\Sukhadia's - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Jersey Shore to Miami Clubs Give Us Your Dignity -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Finding Out About Merrill Lynch�s CDO Problem Was Like Getting Kicked in the Balls, Says Former CEO -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Will Apple Announce a Mass iPhone Recall Friday -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\AIG Chairman Steps Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Shoeshiner Arrested for Burning Stand Down Twice, Returns to Work Next Day -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Attorney General Candidate Eric Schneiderman Gets Attention for the Wrong Reasons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Easter to Blame for Unemployment -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Comptroller DiNapoli Is Dragged Into Hevesi State Pension-Fund Scandal -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Former NYU Chemistry Director Submitted Over $400K Worth of Fake Receipts -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Taliban�s Terrifying Army of Monkey Terrorists Effectively Ends War -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Is Banksy�s Mr. Brainwash an Art-World Borat -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Snooki to Meghan McCain Your Dad Is Hot -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Cuban Spy Gets Life in Jail -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Bet You Thought Rachel Maddow Always Defied Her Gender Norms -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Mort Zuckerman Is Not an Obama Speechwriter -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Editor & Publisher Names Journal CEO Publisher of the Year -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Obama Orders Hospital Visitation Rights for Same-Sex Partners -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Pope Breaks Silence Over Recent Church Scandals -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Movies on the TV on the Computer on... -- The Projectionist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\See Sonic Youth�s Thurston Moore �Teaching� -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Manhattan Real Estate Market Reports Closings Up, Prices Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\U.S. Officials Knew About the Russian Spies for Ten Years -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Tea Party Express Releases Hit List -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Gaga for the Hamptons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2010\Mike Bolt, Keeper of the Stanley Cup, on His White Gloves and Hanging Out With John Cusack -- The Sports Section.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\Bradley Cooper Consumed 8,000 Calories a Day -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\Emma Sulkowicz to Attend State of the Union -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\The Most-Coveted Titles at Idea Books� New Store -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\Why Can�t Hollywood Get Women Journalists Right -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\What It�s Like to Date Your Dad -- Science of Us.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\Can This Congressman Make America More Zen -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\How to Revive Your Summer Stripes in Winter -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\Was That a Blow-Job Bib on Empire Last Night -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\Sorry, Glassholes Google to Scrap Google Glass -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\It�s Now Okay to Sleep Through a Fashion Show -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\What It�s Like to Be One of �Karl�s Boys� -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\The Pope Doesn�t Want You Breeding Like Rabbits -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\Guys, Drake Wants to Date a Writer -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\The Rise and Rise and Rise of ABC�s Ben Sherwood -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\53 Historians Weigh In on Barack Obama�s Legacy -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\Isaac Mizrahi Does Not Know What the Sun Is -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\Remember Glitter-Bombing Is Sparkly and Illegal -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\The Real, the Abstract The Work of Michael Mann -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\Report Frida Giannini Departed Early From Gucci -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\Kendall Jenner Reflects on Her Youth for WSJ -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\One Percent Have Almost Half the World�s Wealth -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\World Pissed That Women Just Want to Be Friends -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\Objects With More Chemistry Than 50 Shades Stars -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\U.K. Spy Agency Collected Journalists� Emails -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\Taylor Swift Wore Jodhpurs to Whole Foods -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\ContentCodeBlurringFilter\www.nymag.com\2015\Solange Posted the Sweetest MLK Tribute -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\Suspect in N.H. Machete Attack Regrets Girl Survived - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\Delays at Two Airports Thanks to Suspicious Luggage - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\Fla. Man Calls Police After Pot Plants Stolen - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\FOXNews.com - Obama U.S. Intelligence Should Have 'Uncovered' Christmas Day Plot.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\Canada to Use Full-Body Scanners in Airports - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\United Airlines Pilot Charged With Being Over Alcohol Limit - International News News of the World Middle East News Europe News - FOXN.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\FOXNews.com - U.S. Feared Spectacular Terror Attack at Obama's Inauguration.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\Dubai Opens World's Tallest Building - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\FOXNews.com - Scientists Dolphins Should Be Treated as 'Non-Human Persons'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2010\U.S. Embassy in Yemen Reopens After Threat - Middle East Map News Crisis - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\State of the Union Obama proposals show president on �offense� despite GOP wave Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Purported cartel hit man arrested in disappearance of 43 Mexican students Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Today is 'saddest day of the year' -- and there's a 'Blue Monday' equation that 'explains' why Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\By 2050, cancer will hardly kill anyone under 80, researchers say Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Faith, community leaders, family gather in Atlanta to celebrate Martin Luther King Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\2015 Ford Shelby GT Debuts with 625 Horsepower Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\ISIS leader orders failed fighters executed, says report Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Stanford students to see what admissions officers wrote on their applications Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Glut is expected to drive down the price of milk Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Man dies in 1,000-foot fall from Alaska mountain Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Dad gets 'no-show' bill after son, 5, misses birthday party Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Colorado lawmakers look to ban asset seizures without convictions Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Oklahoma man at center of police shooting said he never made 911 call that led to raid Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Lebanon-born porn star draws fans, death threats after performing in hijab Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Official Cuba terror listing won�t affect US push for embassy, diplomatic ties Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\'Selma' sets off new fight over Martin Luther King's contested legacy Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Report Harry Reid 'most vulnerable' in 2016, may retire Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Study used to bolster NY fracking ban developed by anti-fracking 'activists' Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Ben & Jerry�s jumps on the cookie butter trend with new ice cream flavors Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Bible story Doubts raised over a Texas inaugural tradition Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Cecilia Abbott poised to become Texas� first Latina First Lady Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Pope expected to visit Philadelphia, White House, UN on September US trip Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Military continues development of stealth hybrid motorcycle Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\America pays tribute to Martin Luther King as events held across the nation Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Officials No others involved in Argentine special prosecutor death Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.foxnews.com\2015\Kentucky man who led 'Bonnie and Clyde' spree agrees to go home and face charges Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\Volcano Towns' Future Shrouded in Uncertainty.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20001110010200_www.latimes.com_sports_updates_lakers_lat_lakers001109.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\Jury Awards Smokers Millions for Ills Caused by Cigarettes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20000815052707_www.latimes.com_business_20000814_t000076119.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_seats000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_nation_updates_lat_sub000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20001110010200_www.latimes.com_travel_california_lat_oakland001105.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20000407195301_www.latimes.com_business_updates_lat_janitor000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_politics_elect2000_pres_lat_tension000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_mozambique000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20001110010200_www.calendarlive.com_top_1,1419,L_LATimes_Restaurants_X!PlaceDetail_35986,00.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\Experts Fret Over Effect of Gene Patents on Research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\Anti-Flag Trek Ends; Banner Yet Waves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\Janitors' Quest Complicated by Shifting Nature of the Job.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\For Web Crowd, Stock Market's No Party Pooper.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\Quackenbush Let Insurer Avoid State Investigation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_state_reports_youth_lat_kidcity000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_dems000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20000407195301_www.latimes.com_living_lat_ramos000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_protest000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\Makers of Smart Cards Are Betting Big on U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_costco000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\Elian to Be Returned to Father Next Week, Reno Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_monarch000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2000\Ivory Stash Driving Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050616023919_www_latimes_com_classified_realestate_hotprop_la_re_hotprop12jun12,0,7414036_story_coll_la_home_realestate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_nationworld_wire_ats_ap_top14jun15,0,2017011_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\Digging Into Seymour Hersh.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_commentary_la_oe_debellaigue15jun15,0,974217_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_la_ed_fedmay1mar01,0,7488388_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_outdoors_la_os_wildflowers1mar01,0,2946111_story_coll_la_home_outdoors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\Los Angeles Times - Postcards from Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_oe_morrison2mar02,0,3036572_column_coll_la_home_utilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_iraq_la_fg_tapes2mar02,0,2971270_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_politics_la_me_hertzlaw2mar02,0,6813645_story_coll_la_home_politics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_me_disney2mar02,0,5119900_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_calcook2mar02,0,7727608_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\Sex, Politics and President Hillary.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_nation_la_na_chicago2mar02,0,531573_story_coll_la_home_nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_mildredpierce2mar02,0,7398991_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_commentary_la_oe_bandow2mar02,0,4787970_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\Loss Strengthens Brothers' Bond.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\Judge Upholds S_D_ Mayoral Election.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\George Bush Talks Big, and He Delivers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_fi_kit2mar02,0,5314050_story_coll_la_home_local.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_sunday_commentary_la_op_faith12jun12,0,6389562_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_jews2mar02,0,5443257_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_ushaiti1mar01,0,3978550_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\Shiite Leads Iraq Vote; 3 Marines Killed.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_wire_ats_ap_top19mar02,0,181991_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Would-be LAX terrorist's prison sentence overturned as too lenient - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\AIG begins awarding abbreviated bonuses - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\FDA scolds maker of Tylenol for foot-dragging in recall - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\U.S. is coming to Haiti's aid as fast as it can, Pentagon officials say - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\China's investments in U.S. up sharply - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Obama calls for action on healthcare - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\To combat piracy, UCLA reaches for the Clicker Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Gates makes recommendations in Ft. Hood shooting case - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Bias raises gays' risk of mental disorders, witness at Prop. 8 trial says - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Quakes in Chile and Haiti raise concerns about Southland preparedness - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Education should accompany prostate screening, new guidelines say - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Woman killed by hit-and-run driver in Harbor Gateway neighborhood L.A. NOW Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Nigerian president's health remains a mystery - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\White House considers D.C. trial for Guantanamo detainee - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Aid surge gets going in Haiti - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\In Illinois, it's Kirk (R) vs Giannoulias (D) for Obama's former Senate seat Top of the Ticket Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Drug maker Johnson & Johnson paid kickbacks to mega-pharmacy, U.S. charges in civil complaint D.C. Now Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Suspect in plane bomb plot is reportedly cooperating again with authorities - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\California Medical Board admits allowing troubled doctor to monitor another - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\FDA issues warnings on food labels - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Cross found at Air Force Academy's Wicca center - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Among the gated enclaves, anger and fear over Chelsea King's killing - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Sept. 11 hijackers' Florida apartment building to be razed - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Rep. Charles Rangel temporarily steps down as Ways and Means chairman - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2010\Apple has a flickering fix for the 27-inch iMac Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Placentia teacher charged with molestation; prosecutors seek other victims - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Yelp names BBQ spot in Big Pine, CA the best restaurant in the country - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\California lawmakers heading to policy retreats to prepare for year - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Israel's spy agency denies opposing new Iran sanctions - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Obama won't meet with Netanyahu during U.S. visit, White House says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\3 arrested, drugs, military-style weapons seized in standoff - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\City National, L.A.'s 'bank to the stars,' sold for $5.4 billion - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Are drug-carrying drones the future of smuggling Probably not, DEA says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Dodger Stadium attacker to plead guilty to federal weapons charge - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Why Will Ferrell hit a cheerleader in the head with a basketball - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\GOP operative plans anti-Steyer effort - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Ex-state Sen. Calderon seeks another delay in public corruption trial - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Jury acquits skin-care guru accused of plot against rival - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\DreamWorks Animation to cut 500 jobs - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Glendale mental health center named for L.A. County Supervisor Antonovich - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Police appear to try to tip man out of his wheelchair in video - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Billionaire Tom Steyer eyes climate change, education in potential Senate bid - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\U.S. judge rejects overtime for home health aides - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Tom Steyer's exit shifts Senate race focus to Antonio Villaraigosa - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\St. Louis officers fatally shoot man armed with loaded gun, police say - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Thousands of fiery red crabs wash ashore in Newport Beach - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Environmentalist Tom Steyer opts out of Senate race - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Boston bombing trial delayed as jury selection bogs down - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\Kobe Bryant suffers torn rotator cuff in right shoulder - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.latimes.com\2015\India court orders activist on 14-year hunger strike freed - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\gotham External Affairs by JOEL ROSENBLATT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\movies In Brief Judy Berlin by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\intelligencer March 6, 2000 by BETH LANDMAN KEIL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\cityside The Laptop Nomads by MARK JACOBSON (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\cover story Silicon Alley 10003 by VANESSA GRIGORIADIS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\cover story Best Bets by CORKY POLLAN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\classical music Some Like It Tepid by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\features Song And Dance by MICHAEL TOMASKY (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\smart city Smooth Moves by ROSE-ANNE CLERMONT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\movies Man in the Muddle by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\gotham style The jean jacket, reconsidered (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\theater Love Canal by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\pop music Bold Ambition by ETHAN BROWN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\dance Roots by TOBI TOBIAS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\the city politic Screening Rooms by PEGGY EDERSHEIM KALB (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2000\sales & bargains Snake Charmers by SHYAMA PATEL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\Brella Bar Opens on Elizabeth St. - Sleep Opens in Williamsburg - Katrina Parris Moves to Harlem - Plain Sud Closing.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\A Crime Rap Sheet on Clinton Street.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\I Am a Bird Now - New York Magazine Pop Music Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\New York's Latest Dating Fad.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\Co-op Boards and Brokers Cooperate - LeFrak City Expands to East 73rd St. - The Upper Upper West Side Price Difference.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\Remembering Architect Philip Johnson.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\Who Failed the C Train.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\Peter Hayn - New York Magazine Look Book.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\Onera - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\Sukhadia's - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Jersey Shore to Miami Clubs Give Us Your Dignity -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Finding Out About Merrill Lynch�s CDO Problem Was Like Getting Kicked in the Balls, Says Former CEO -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Will Apple Announce a Mass iPhone Recall Friday -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\AIG Chairman Steps Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Shoeshiner Arrested for Burning Stand Down Twice, Returns to Work Next Day -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Attorney General Candidate Eric Schneiderman Gets Attention for the Wrong Reasons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Easter to Blame for Unemployment -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Comptroller DiNapoli Is Dragged Into Hevesi State Pension-Fund Scandal -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Former NYU Chemistry Director Submitted Over $400K Worth of Fake Receipts -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Taliban�s Terrifying Army of Monkey Terrorists Effectively Ends War -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Is Banksy�s Mr. Brainwash an Art-World Borat -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Snooki to Meghan McCain Your Dad Is Hot -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Cuban Spy Gets Life in Jail -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Bet You Thought Rachel Maddow Always Defied Her Gender Norms -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Mort Zuckerman Is Not an Obama Speechwriter -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Editor & Publisher Names Journal CEO Publisher of the Year -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Obama Orders Hospital Visitation Rights for Same-Sex Partners -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Pope Breaks Silence Over Recent Church Scandals -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Movies on the TV on the Computer on... -- The Projectionist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\See Sonic Youth�s Thurston Moore �Teaching� -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Manhattan Real Estate Market Reports Closings Up, Prices Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\U.S. Officials Knew About the Russian Spies for Ten Years -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Tea Party Express Releases Hit List -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Gaga for the Hamptons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2010\Mike Bolt, Keeper of the Stanley Cup, on His White Gloves and Hanging Out With John Cusack -- The Sports Section.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\Bradley Cooper Consumed 8,000 Calories a Day -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\Emma Sulkowicz to Attend State of the Union -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\The Most-Coveted Titles at Idea Books� New Store -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\Why Can�t Hollywood Get Women Journalists Right -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\What It�s Like to Date Your Dad -- Science of Us.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\Can This Congressman Make America More Zen -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\How to Revive Your Summer Stripes in Winter -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\Was That a Blow-Job Bib on Empire Last Night -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\Sorry, Glassholes Google to Scrap Google Glass -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\It�s Now Okay to Sleep Through a Fashion Show -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\What It�s Like to Be One of �Karl�s Boys� -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\The Pope Doesn�t Want You Breeding Like Rabbits -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\Guys, Drake Wants to Date a Writer -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\The Rise and Rise and Rise of ABC�s Ben Sherwood -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\53 Historians Weigh In on Barack Obama�s Legacy -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\Isaac Mizrahi Does Not Know What the Sun Is -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\Remember Glitter-Bombing Is Sparkly and Illegal -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\The Real, the Abstract The Work of Michael Mann -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\Report Frida Giannini Departed Early From Gucci -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\Kendall Jenner Reflects on Her Youth for WSJ -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\One Percent Have Almost Half the World�s Wealth -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\World Pissed That Women Just Want to Be Friends -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\Objects With More Chemistry Than 50 Shades Stars -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\U.K. Spy Agency Collected Journalists� Emails -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\Taylor Swift Wore Jodhpurs to Whole Foods -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\DocumentSlopeCurveFilter\www.nymag.com\2015\Solange Posted the Sweetest MLK Tribute -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\Suspect in N.H. Machete Attack Regrets Girl Survived - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\Delays at Two Airports Thanks to Suspicious Luggage - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\Fla. Man Calls Police After Pot Plants Stolen - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Obama U.S. Intelligence Should Have 'Uncovered' Christmas Day Plot.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\Canada to Use Full-Body Scanners in Airports - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\United Airlines Pilot Charged With Being Over Alcohol Limit - International News News of the World Middle East News Europe News - FOXN.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - U.S. Feared Spectacular Terror Attack at Obama's Inauguration.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\Dubai Opens World's Tallest Building - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Scientists Dolphins Should Be Treated as 'Non-Human Persons'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2010\U.S. Embassy in Yemen Reopens After Threat - Middle East Map News Crisis - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\State of the Union Obama proposals show president on �offense� despite GOP wave Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Purported cartel hit man arrested in disappearance of 43 Mexican students Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Today is 'saddest day of the year' -- and there's a 'Blue Monday' equation that 'explains' why Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\By 2050, cancer will hardly kill anyone under 80, researchers say Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Faith, community leaders, family gather in Atlanta to celebrate Martin Luther King Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\2015 Ford Shelby GT Debuts with 625 Horsepower Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\ISIS leader orders failed fighters executed, says report Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Stanford students to see what admissions officers wrote on their applications Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Glut is expected to drive down the price of milk Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Man dies in 1,000-foot fall from Alaska mountain Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Dad gets 'no-show' bill after son, 5, misses birthday party Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Colorado lawmakers look to ban asset seizures without convictions Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Oklahoma man at center of police shooting said he never made 911 call that led to raid Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Lebanon-born porn star draws fans, death threats after performing in hijab Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Official Cuba terror listing won�t affect US push for embassy, diplomatic ties Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\'Selma' sets off new fight over Martin Luther King's contested legacy Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Report Harry Reid 'most vulnerable' in 2016, may retire Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Study used to bolster NY fracking ban developed by anti-fracking 'activists' Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Ben & Jerry�s jumps on the cookie butter trend with new ice cream flavors Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Bible story Doubts raised over a Texas inaugural tradition Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Cecilia Abbott poised to become Texas� first Latina First Lady Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Pope expected to visit Philadelphia, White House, UN on September US trip Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Military continues development of stealth hybrid motorcycle Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\America pays tribute to Martin Luther King as events held across the nation Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Officials No others involved in Argentine special prosecutor death Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.foxnews.com\2015\Kentucky man who led 'Bonnie and Clyde' spree agrees to go home and face charges Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\Volcano Towns' Future Shrouded in Uncertainty.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20001110010200_www.latimes.com_sports_updates_lakers_lat_lakers001109.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\Jury Awards Smokers Millions for Ills Caused by Cigarettes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20000815052707_www.latimes.com_business_20000814_t000076119.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_seats000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_nation_updates_lat_sub000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20001110010200_www.latimes.com_travel_california_lat_oakland001105.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20000407195301_www.latimes.com_business_updates_lat_janitor000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_politics_elect2000_pres_lat_tension000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_mozambique000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20001110010200_www.calendarlive.com_top_1,1419,L_LATimes_Restaurants_X!PlaceDetail_35986,00.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\Experts Fret Over Effect of Gene Patents on Research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\Anti-Flag Trek Ends; Banner Yet Waves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\Janitors' Quest Complicated by Shifting Nature of the Job.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\For Web Crowd, Stock Market's No Party Pooper.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\Quackenbush Let Insurer Avoid State Investigation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_state_reports_youth_lat_kidcity000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_dems000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20000407195301_www.latimes.com_living_lat_ramos000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_protest000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\Makers of Smart Cards Are Betting Big on U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_costco000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\Elian to Be Returned to Father Next Week, Reno Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_monarch000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2000\Ivory Stash Driving Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050616023919_www_latimes_com_classified_realestate_hotprop_la_re_hotprop12jun12,0,7414036_story_coll_la_home_realestate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_nationworld_wire_ats_ap_top14jun15,0,2017011_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\Digging Into Seymour Hersh.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_commentary_la_oe_debellaigue15jun15,0,974217_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_la_ed_fedmay1mar01,0,7488388_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_outdoors_la_os_wildflowers1mar01,0,2946111_story_coll_la_home_outdoors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\Los Angeles Times - Postcards from Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_oe_morrison2mar02,0,3036572_column_coll_la_home_utilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_iraq_la_fg_tapes2mar02,0,2971270_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_politics_la_me_hertzlaw2mar02,0,6813645_story_coll_la_home_politics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_me_disney2mar02,0,5119900_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_calcook2mar02,0,7727608_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\Sex, Politics and President Hillary.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_nation_la_na_chicago2mar02,0,531573_story_coll_la_home_nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_mildredpierce2mar02,0,7398991_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_commentary_la_oe_bandow2mar02,0,4787970_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\Loss Strengthens Brothers' Bond.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\Judge Upholds S_D_ Mayoral Election.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\George Bush Talks Big, and He Delivers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_fi_kit2mar02,0,5314050_story_coll_la_home_local.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_sunday_commentary_la_op_faith12jun12,0,6389562_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_jews2mar02,0,5443257_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_ushaiti1mar01,0,3978550_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\Shiite Leads Iraq Vote; 3 Marines Killed.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_wire_ats_ap_top19mar02,0,181991_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Would-be LAX terrorist's prison sentence overturned as too lenient - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\AIG begins awarding abbreviated bonuses - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\FDA scolds maker of Tylenol for foot-dragging in recall - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\U.S. is coming to Haiti's aid as fast as it can, Pentagon officials say - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\China's investments in U.S. up sharply - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Obama calls for action on healthcare - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\To combat piracy, UCLA reaches for the Clicker Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Gates makes recommendations in Ft. Hood shooting case - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Bias raises gays' risk of mental disorders, witness at Prop. 8 trial says - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Quakes in Chile and Haiti raise concerns about Southland preparedness - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Education should accompany prostate screening, new guidelines say - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Woman killed by hit-and-run driver in Harbor Gateway neighborhood L.A. NOW Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Nigerian president's health remains a mystery - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\White House considers D.C. trial for Guantanamo detainee - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Aid surge gets going in Haiti - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\In Illinois, it's Kirk (R) vs Giannoulias (D) for Obama's former Senate seat Top of the Ticket Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Drug maker Johnson & Johnson paid kickbacks to mega-pharmacy, U.S. charges in civil complaint D.C. Now Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Suspect in plane bomb plot is reportedly cooperating again with authorities - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\California Medical Board admits allowing troubled doctor to monitor another - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\FDA issues warnings on food labels - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Cross found at Air Force Academy's Wicca center - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Among the gated enclaves, anger and fear over Chelsea King's killing - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Sept. 11 hijackers' Florida apartment building to be razed - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Rep. Charles Rangel temporarily steps down as Ways and Means chairman - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2010\Apple has a flickering fix for the 27-inch iMac Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Placentia teacher charged with molestation; prosecutors seek other victims - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Yelp names BBQ spot in Big Pine, CA the best restaurant in the country - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\California lawmakers heading to policy retreats to prepare for year - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Israel's spy agency denies opposing new Iran sanctions - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Obama won't meet with Netanyahu during U.S. visit, White House says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\3 arrested, drugs, military-style weapons seized in standoff - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\City National, L.A.'s 'bank to the stars,' sold for $5.4 billion - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Are drug-carrying drones the future of smuggling Probably not, DEA says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Dodger Stadium attacker to plead guilty to federal weapons charge - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Why Will Ferrell hit a cheerleader in the head with a basketball - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\GOP operative plans anti-Steyer effort - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Ex-state Sen. Calderon seeks another delay in public corruption trial - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Jury acquits skin-care guru accused of plot against rival - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\DreamWorks Animation to cut 500 jobs - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Glendale mental health center named for L.A. County Supervisor Antonovich - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Police appear to try to tip man out of his wheelchair in video - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Billionaire Tom Steyer eyes climate change, education in potential Senate bid - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\U.S. judge rejects overtime for home health aides - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Tom Steyer's exit shifts Senate race focus to Antonio Villaraigosa - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\St. Louis officers fatally shoot man armed with loaded gun, police say - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Thousands of fiery red crabs wash ashore in Newport Beach - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Environmentalist Tom Steyer opts out of Senate race - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Boston bombing trial delayed as jury selection bogs down - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\Kobe Bryant suffers torn rotator cuff in right shoulder - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.latimes.com\2015\India court orders activist on 14-year hunger strike freed - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\gotham External Affairs by JOEL ROSENBLATT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\movies In Brief Judy Berlin by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\intelligencer March 6, 2000 by BETH LANDMAN KEIL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\cityside The Laptop Nomads by MARK JACOBSON (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\cover story Silicon Alley 10003 by VANESSA GRIGORIADIS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\cover story Best Bets by CORKY POLLAN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\classical music Some Like It Tepid by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\features Song And Dance by MICHAEL TOMASKY (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\smart city Smooth Moves by ROSE-ANNE CLERMONT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\movies Man in the Muddle by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\gotham style The jean jacket, reconsidered (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\theater Love Canal by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\pop music Bold Ambition by ETHAN BROWN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\dance Roots by TOBI TOBIAS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\the city politic Screening Rooms by PEGGY EDERSHEIM KALB (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2000\sales & bargains Snake Charmers by SHYAMA PATEL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\Brella Bar Opens on Elizabeth St. - Sleep Opens in Williamsburg - Katrina Parris Moves to Harlem - Plain Sud Closing.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\A Crime Rap Sheet on Clinton Street.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\I Am a Bird Now - New York Magazine Pop Music Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\New York's Latest Dating Fad.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\Co-op Boards and Brokers Cooperate - LeFrak City Expands to East 73rd St. - The Upper Upper West Side Price Difference.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\Remembering Architect Philip Johnson.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\Who Failed the C Train.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\Peter Hayn - New York Magazine Look Book.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\Onera - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\Sukhadia's - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Jersey Shore to Miami Clubs Give Us Your Dignity -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Finding Out About Merrill Lynch�s CDO Problem Was Like Getting Kicked in the Balls, Says Former CEO -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Will Apple Announce a Mass iPhone Recall Friday -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\AIG Chairman Steps Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Shoeshiner Arrested for Burning Stand Down Twice, Returns to Work Next Day -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Attorney General Candidate Eric Schneiderman Gets Attention for the Wrong Reasons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Easter to Blame for Unemployment -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Comptroller DiNapoli Is Dragged Into Hevesi State Pension-Fund Scandal -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Former NYU Chemistry Director Submitted Over $400K Worth of Fake Receipts -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Taliban�s Terrifying Army of Monkey Terrorists Effectively Ends War -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Is Banksy�s Mr. Brainwash an Art-World Borat -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Snooki to Meghan McCain Your Dad Is Hot -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Cuban Spy Gets Life in Jail -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Bet You Thought Rachel Maddow Always Defied Her Gender Norms -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Mort Zuckerman Is Not an Obama Speechwriter -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Editor & Publisher Names Journal CEO Publisher of the Year -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Obama Orders Hospital Visitation Rights for Same-Sex Partners -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Pope Breaks Silence Over Recent Church Scandals -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Movies on the TV on the Computer on... -- The Projectionist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\See Sonic Youth�s Thurston Moore �Teaching� -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Manhattan Real Estate Market Reports Closings Up, Prices Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\U.S. Officials Knew About the Russian Spies for Ten Years -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Tea Party Express Releases Hit List -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Gaga for the Hamptons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2010\Mike Bolt, Keeper of the Stanley Cup, on His White Gloves and Hanging Out With John Cusack -- The Sports Section.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\Bradley Cooper Consumed 8,000 Calories a Day -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\Emma Sulkowicz to Attend State of the Union -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\The Most-Coveted Titles at Idea Books� New Store -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\Why Can�t Hollywood Get Women Journalists Right -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\What It�s Like to Date Your Dad -- Science of Us.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\Can This Congressman Make America More Zen -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\How to Revive Your Summer Stripes in Winter -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\Was That a Blow-Job Bib on Empire Last Night -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\Sorry, Glassholes Google to Scrap Google Glass -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\It�s Now Okay to Sleep Through a Fashion Show -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\What It�s Like to Be One of �Karl�s Boys� -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\The Pope Doesn�t Want You Breeding Like Rabbits -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\Guys, Drake Wants to Date a Writer -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\The Rise and Rise and Rise of ABC�s Ben Sherwood -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\53 Historians Weigh In on Barack Obama�s Legacy -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\Isaac Mizrahi Does Not Know What the Sun Is -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\Remember Glitter-Bombing Is Sparkly and Illegal -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\The Real, the Abstract The Work of Michael Mann -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\Report Frida Giannini Departed Early From Gucci -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\Kendall Jenner Reflects on Her Youth for WSJ -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\One Percent Have Almost Half the World�s Wealth -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\World Pissed That Women Just Want to Be Friends -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\Objects With More Chemistry Than 50 Shades Stars -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\U.K. Spy Agency Collected Journalists� Emails -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\Taylor Swift Wore Jodhpurs to Whole Foods -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorDomFilter\www.nymag.com\2015\Solange Posted the Sweetest MLK Tribute -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\Suspect in N.H. Machete Attack Regrets Girl Survived - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\Delays at Two Airports Thanks to Suspicious Luggage - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\Fla. Man Calls Police After Pot Plants Stolen - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\FOXNews.com - Obama U.S. Intelligence Should Have 'Uncovered' Christmas Day Plot.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\Canada to Use Full-Body Scanners in Airports - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\United Airlines Pilot Charged With Being Over Alcohol Limit - International News News of the World Middle East News Europe News - FOXN.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\FOXNews.com - U.S. Feared Spectacular Terror Attack at Obama's Inauguration.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\Dubai Opens World's Tallest Building - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\FOXNews.com - Scientists Dolphins Should Be Treated as 'Non-Human Persons'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2010\U.S. Embassy in Yemen Reopens After Threat - Middle East Map News Crisis - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\State of the Union Obama proposals show president on �offense� despite GOP wave Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Purported cartel hit man arrested in disappearance of 43 Mexican students Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Today is 'saddest day of the year' -- and there's a 'Blue Monday' equation that 'explains' why Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\By 2050, cancer will hardly kill anyone under 80, researchers say Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Faith, community leaders, family gather in Atlanta to celebrate Martin Luther King Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\2015 Ford Shelby GT Debuts with 625 Horsepower Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\ISIS leader orders failed fighters executed, says report Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Stanford students to see what admissions officers wrote on their applications Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Glut is expected to drive down the price of milk Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Man dies in 1,000-foot fall from Alaska mountain Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Dad gets 'no-show' bill after son, 5, misses birthday party Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Colorado lawmakers look to ban asset seizures without convictions Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Oklahoma man at center of police shooting said he never made 911 call that led to raid Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Lebanon-born porn star draws fans, death threats after performing in hijab Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Official Cuba terror listing won�t affect US push for embassy, diplomatic ties Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\'Selma' sets off new fight over Martin Luther King's contested legacy Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Report Harry Reid 'most vulnerable' in 2016, may retire Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Study used to bolster NY fracking ban developed by anti-fracking 'activists' Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Ben & Jerry�s jumps on the cookie butter trend with new ice cream flavors Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Bible story Doubts raised over a Texas inaugural tradition Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Cecilia Abbott poised to become Texas� first Latina First Lady Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Pope expected to visit Philadelphia, White House, UN on September US trip Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Military continues development of stealth hybrid motorcycle Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\America pays tribute to Martin Luther King as events held across the nation Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Officials No others involved in Argentine special prosecutor death Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.foxnews.com\2015\Kentucky man who led 'Bonnie and Clyde' spree agrees to go home and face charges Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\Volcano Towns' Future Shrouded in Uncertainty.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20001110010200_www.latimes.com_sports_updates_lakers_lat_lakers001109.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\Jury Awards Smokers Millions for Ills Caused by Cigarettes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20000815052707_www.latimes.com_business_20000814_t000076119.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_seats000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_nation_updates_lat_sub000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20001110010200_www.latimes.com_travel_california_lat_oakland001105.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20000407195301_www.latimes.com_business_updates_lat_janitor000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_politics_elect2000_pres_lat_tension000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_mozambique000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20001110010200_www.calendarlive.com_top_1,1419,L_LATimes_Restaurants_X!PlaceDetail_35986,00.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\Experts Fret Over Effect of Gene Patents on Research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\Anti-Flag Trek Ends; Banner Yet Waves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\Janitors' Quest Complicated by Shifting Nature of the Job.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\For Web Crowd, Stock Market's No Party Pooper.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\Quackenbush Let Insurer Avoid State Investigation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_state_reports_youth_lat_kidcity000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_dems000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20000407195301_www.latimes.com_living_lat_ramos000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_protest000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\Makers of Smart Cards Are Betting Big on U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_costco000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\Elian to Be Returned to Father Next Week, Reno Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_monarch000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2000\Ivory Stash Driving Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050616023919_www_latimes_com_classified_realestate_hotprop_la_re_hotprop12jun12,0,7414036_story_coll_la_home_realestate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_nationworld_wire_ats_ap_top14jun15,0,2017011_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\Digging Into Seymour Hersh.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_commentary_la_oe_debellaigue15jun15,0,974217_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_la_ed_fedmay1mar01,0,7488388_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_outdoors_la_os_wildflowers1mar01,0,2946111_story_coll_la_home_outdoors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\Los Angeles Times - Postcards from Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_oe_morrison2mar02,0,3036572_column_coll_la_home_utilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_iraq_la_fg_tapes2mar02,0,2971270_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_politics_la_me_hertzlaw2mar02,0,6813645_story_coll_la_home_politics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_me_disney2mar02,0,5119900_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_calcook2mar02,0,7727608_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\Sex, Politics and President Hillary.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_nation_la_na_chicago2mar02,0,531573_story_coll_la_home_nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_mildredpierce2mar02,0,7398991_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_commentary_la_oe_bandow2mar02,0,4787970_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\Loss Strengthens Brothers' Bond.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\Judge Upholds S_D_ Mayoral Election.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\George Bush Talks Big, and He Delivers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_fi_kit2mar02,0,5314050_story_coll_la_home_local.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_sunday_commentary_la_op_faith12jun12,0,6389562_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_jews2mar02,0,5443257_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_ushaiti1mar01,0,3978550_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\Shiite Leads Iraq Vote; 3 Marines Killed.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_wire_ats_ap_top19mar02,0,181991_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Would-be LAX terrorist's prison sentence overturned as too lenient - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\AIG begins awarding abbreviated bonuses - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\FDA scolds maker of Tylenol for foot-dragging in recall - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\U.S. is coming to Haiti's aid as fast as it can, Pentagon officials say - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\China's investments in U.S. up sharply - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Obama calls for action on healthcare - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\To combat piracy, UCLA reaches for the Clicker Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Gates makes recommendations in Ft. Hood shooting case - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Bias raises gays' risk of mental disorders, witness at Prop. 8 trial says - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Quakes in Chile and Haiti raise concerns about Southland preparedness - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Education should accompany prostate screening, new guidelines say - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Woman killed by hit-and-run driver in Harbor Gateway neighborhood L.A. NOW Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Nigerian president's health remains a mystery - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\White House considers D.C. trial for Guantanamo detainee - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Aid surge gets going in Haiti - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\In Illinois, it's Kirk (R) vs Giannoulias (D) for Obama's former Senate seat Top of the Ticket Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Drug maker Johnson & Johnson paid kickbacks to mega-pharmacy, U.S. charges in civil complaint D.C. Now Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Suspect in plane bomb plot is reportedly cooperating again with authorities - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\California Medical Board admits allowing troubled doctor to monitor another - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\FDA issues warnings on food labels - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Cross found at Air Force Academy's Wicca center - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Among the gated enclaves, anger and fear over Chelsea King's killing - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Sept. 11 hijackers' Florida apartment building to be razed - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Rep. Charles Rangel temporarily steps down as Ways and Means chairman - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2010\Apple has a flickering fix for the 27-inch iMac Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Placentia teacher charged with molestation; prosecutors seek other victims - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Yelp names BBQ spot in Big Pine, CA the best restaurant in the country - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\California lawmakers heading to policy retreats to prepare for year - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Israel's spy agency denies opposing new Iran sanctions - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Obama won't meet with Netanyahu during U.S. visit, White House says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\3 arrested, drugs, military-style weapons seized in standoff - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\City National, L.A.'s 'bank to the stars,' sold for $5.4 billion - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Are drug-carrying drones the future of smuggling Probably not, DEA says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Dodger Stadium attacker to plead guilty to federal weapons charge - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Why Will Ferrell hit a cheerleader in the head with a basketball - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\GOP operative plans anti-Steyer effort - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Ex-state Sen. Calderon seeks another delay in public corruption trial - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Jury acquits skin-care guru accused of plot against rival - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\DreamWorks Animation to cut 500 jobs - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Glendale mental health center named for L.A. County Supervisor Antonovich - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Police appear to try to tip man out of his wheelchair in video - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Billionaire Tom Steyer eyes climate change, education in potential Senate bid - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\U.S. judge rejects overtime for home health aides - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Tom Steyer's exit shifts Senate race focus to Antonio Villaraigosa - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\St. Louis officers fatally shoot man armed with loaded gun, police say - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Thousands of fiery red crabs wash ashore in Newport Beach - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Environmentalist Tom Steyer opts out of Senate race - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Boston bombing trial delayed as jury selection bogs down - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\Kobe Bryant suffers torn rotator cuff in right shoulder - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.latimes.com\2015\India court orders activist on 14-year hunger strike freed - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\gotham External Affairs by JOEL ROSENBLATT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\movies In Brief Judy Berlin by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\intelligencer March 6, 2000 by BETH LANDMAN KEIL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\cityside The Laptop Nomads by MARK JACOBSON (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\cover story Silicon Alley 10003 by VANESSA GRIGORIADIS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\cover story Best Bets by CORKY POLLAN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\classical music Some Like It Tepid by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\features Song And Dance by MICHAEL TOMASKY (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\smart city Smooth Moves by ROSE-ANNE CLERMONT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\movies Man in the Muddle by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\gotham style The jean jacket, reconsidered (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\theater Love Canal by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\pop music Bold Ambition by ETHAN BROWN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\dance Roots by TOBI TOBIAS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\the city politic Screening Rooms by PEGGY EDERSHEIM KALB (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2000\sales & bargains Snake Charmers by SHYAMA PATEL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\Brella Bar Opens on Elizabeth St. - Sleep Opens in Williamsburg - Katrina Parris Moves to Harlem - Plain Sud Closing.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\A Crime Rap Sheet on Clinton Street.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\I Am a Bird Now - New York Magazine Pop Music Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\New York's Latest Dating Fad.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\Co-op Boards and Brokers Cooperate - LeFrak City Expands to East 73rd St. - The Upper Upper West Side Price Difference.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\Remembering Architect Philip Johnson.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\Who Failed the C Train.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\Peter Hayn - New York Magazine Look Book.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\Onera - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\Sukhadia's - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Jersey Shore to Miami Clubs Give Us Your Dignity -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Finding Out About Merrill Lynch�s CDO Problem Was Like Getting Kicked in the Balls, Says Former CEO -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Will Apple Announce a Mass iPhone Recall Friday -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\AIG Chairman Steps Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Shoeshiner Arrested for Burning Stand Down Twice, Returns to Work Next Day -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Attorney General Candidate Eric Schneiderman Gets Attention for the Wrong Reasons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Easter to Blame for Unemployment -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Comptroller DiNapoli Is Dragged Into Hevesi State Pension-Fund Scandal -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Former NYU Chemistry Director Submitted Over $400K Worth of Fake Receipts -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Taliban�s Terrifying Army of Monkey Terrorists Effectively Ends War -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Is Banksy�s Mr. Brainwash an Art-World Borat -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Snooki to Meghan McCain Your Dad Is Hot -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Cuban Spy Gets Life in Jail -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Bet You Thought Rachel Maddow Always Defied Her Gender Norms -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Mort Zuckerman Is Not an Obama Speechwriter -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Editor & Publisher Names Journal CEO Publisher of the Year -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Obama Orders Hospital Visitation Rights for Same-Sex Partners -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Pope Breaks Silence Over Recent Church Scandals -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Movies on the TV on the Computer on... -- The Projectionist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\See Sonic Youth�s Thurston Moore �Teaching� -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Manhattan Real Estate Market Reports Closings Up, Prices Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\U.S. Officials Knew About the Russian Spies for Ten Years -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Tea Party Express Releases Hit List -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Gaga for the Hamptons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2010\Mike Bolt, Keeper of the Stanley Cup, on His White Gloves and Hanging Out With John Cusack -- The Sports Section.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\Bradley Cooper Consumed 8,000 Calories a Day -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\Emma Sulkowicz to Attend State of the Union -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\The Most-Coveted Titles at Idea Books� New Store -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\Why Can�t Hollywood Get Women Journalists Right -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\What It�s Like to Date Your Dad -- Science of Us.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\Can This Congressman Make America More Zen -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\How to Revive Your Summer Stripes in Winter -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\Was That a Blow-Job Bib on Empire Last Night -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\Sorry, Glassholes Google to Scrap Google Glass -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\It�s Now Okay to Sleep Through a Fashion Show -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\What It�s Like to Be One of �Karl�s Boys� -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\The Pope Doesn�t Want You Breeding Like Rabbits -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\Guys, Drake Wants to Date a Writer -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\The Rise and Rise and Rise of ABC�s Ben Sherwood -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\53 Historians Weigh In on Barack Obama�s Legacy -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\Isaac Mizrahi Does Not Know What the Sun Is -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\Remember Glitter-Bombing Is Sparkly and Illegal -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\The Real, the Abstract The Work of Michael Mann -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\Report Frida Giannini Departed Early From Gucci -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\Kendall Jenner Reflects on Her Youth for WSJ -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\One Percent Have Almost Half the World�s Wealth -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\World Pissed That Women Just Want to Be Friends -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\Objects With More Chemistry Than 50 Shades Stars -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\U.K. Spy Agency Collected Journalists� Emails -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\Taylor Swift Wore Jodhpurs to Whole Foods -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\FeatureExtractorSplitFilter\www.nymag.com\2015\Solange Posted the Sweetest MLK Tribute -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\Suspect in N.H. Machete Attack Regrets Girl Survived - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\Delays at Two Airports Thanks to Suspicious Luggage - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\Fla. Man Calls Police After Pot Plants Stolen - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\FOXNews.com - Obama U.S. Intelligence Should Have 'Uncovered' Christmas Day Plot.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\Canada to Use Full-Body Scanners in Airports - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\United Airlines Pilot Charged With Being Over Alcohol Limit - International News News of the World Middle East News Europe News - FOXN.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\FOXNews.com - U.S. Feared Spectacular Terror Attack at Obama's Inauguration.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\Dubai Opens World's Tallest Building - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\FOXNews.com - Scientists Dolphins Should Be Treated as 'Non-Human Persons'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2010\U.S. Embassy in Yemen Reopens After Threat - Middle East Map News Crisis - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\State of the Union Obama proposals show president on �offense� despite GOP wave Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Purported cartel hit man arrested in disappearance of 43 Mexican students Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Today is 'saddest day of the year' -- and there's a 'Blue Monday' equation that 'explains' why Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\By 2050, cancer will hardly kill anyone under 80, researchers say Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Faith, community leaders, family gather in Atlanta to celebrate Martin Luther King Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\2015 Ford Shelby GT Debuts with 625 Horsepower Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\ISIS leader orders failed fighters executed, says report Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Stanford students to see what admissions officers wrote on their applications Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Glut is expected to drive down the price of milk Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Man dies in 1,000-foot fall from Alaska mountain Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Dad gets 'no-show' bill after son, 5, misses birthday party Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Colorado lawmakers look to ban asset seizures without convictions Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Oklahoma man at center of police shooting said he never made 911 call that led to raid Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Lebanon-born porn star draws fans, death threats after performing in hijab Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Official Cuba terror listing won�t affect US push for embassy, diplomatic ties Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\'Selma' sets off new fight over Martin Luther King's contested legacy Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Report Harry Reid 'most vulnerable' in 2016, may retire Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Study used to bolster NY fracking ban developed by anti-fracking 'activists' Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Ben & Jerry�s jumps on the cookie butter trend with new ice cream flavors Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Bible story Doubts raised over a Texas inaugural tradition Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Cecilia Abbott poised to become Texas� first Latina First Lady Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Pope expected to visit Philadelphia, White House, UN on September US trip Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Military continues development of stealth hybrid motorcycle Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\America pays tribute to Martin Luther King as events held across the nation Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Officials No others involved in Argentine special prosecutor death Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.foxnews.com\2015\Kentucky man who led 'Bonnie and Clyde' spree agrees to go home and face charges Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\Volcano Towns' Future Shrouded in Uncertainty.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20001110010200_www.latimes.com_sports_updates_lakers_lat_lakers001109.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\Jury Awards Smokers Millions for Ills Caused by Cigarettes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20000815052707_www.latimes.com_business_20000814_t000076119.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_seats000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20000815052707_www.latimes.com_news_nation_updates_lat_sub000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20001110010200_www.latimes.com_travel_california_lat_oakland001105.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20000407195301_www.latimes.com_business_updates_lat_janitor000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20000229101800_www.latimes.com_news_politics_elect2000_pres_lat_tension000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_mozambique000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20001110010200_www.calendarlive.com_top_1,1419,L_LATimes_Restaurants_X!PlaceDetail_35986,00.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\Experts Fret Over Effect of Gene Patents on Research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\Anti-Flag Trek Ends; Banner Yet Waves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\Janitors' Quest Complicated by Shifting Nature of the Job.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\For Web Crowd, Stock Market's No Party Pooper.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\Quackenbush Let Insurer Avoid State Investigation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20000229101800_www.latimes.com_news_state_reports_youth_lat_kidcity000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_dems000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20000407195301_www.latimes.com_living_lat_ramos000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_protest000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\Makers of Smart Cards Are Betting Big on U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_costco000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\Elian to Be Returned to Father Next Week, Reno Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_monarch000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2000\Ivory Stash Driving Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050616023919_www_latimes_com_classified_realestate_hotprop_la_re_hotprop12jun12,0,7414036_story_coll_la_home_realestate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050616023919_www_latimes_com_news_nationworld_wire_ats_ap_top14jun15,0,2017011_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\Digging Into Seymour Hersh.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_commentary_la_oe_debellaigue15jun15,0,974217_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_la_ed_fedmay1mar01,0,7488388_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050302091936_www_latimes_com_features_outdoors_la_os_wildflowers1mar01,0,2946111_story_coll_la_home_outdoors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\Los Angeles Times - Postcards from Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_oe_morrison2mar02,0,3036572_column_coll_la_home_utilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_iraq_la_fg_tapes2mar02,0,2971270_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050302091936_www_latimes_com_news_politics_la_me_hertzlaw2mar02,0,6813645_story_coll_la_home_politics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_me_disney2mar02,0,5119900_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_calcook2mar02,0,7727608_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\Sex, Politics and President Hillary.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_nation_la_na_chicago2mar02,0,531573_story_coll_la_home_nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_mildredpierce2mar02,0,7398991_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_commentary_la_oe_bandow2mar02,0,4787970_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\Loss Strengthens Brothers' Bond.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\Judge Upholds S_D_ Mayoral Election.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\George Bush Talks Big, and He Delivers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_fi_kit2mar02,0,5314050_story_coll_la_home_local.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_sunday_commentary_la_op_faith12jun12,0,6389562_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_jews2mar02,0,5443257_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_ushaiti1mar01,0,3978550_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\Shiite Leads Iraq Vote; 3 Marines Killed.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_wire_ats_ap_top19mar02,0,181991_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Would-be LAX terrorist's prison sentence overturned as too lenient - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\AIG begins awarding abbreviated bonuses - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\FDA scolds maker of Tylenol for foot-dragging in recall - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\U.S. is coming to Haiti's aid as fast as it can, Pentagon officials say - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\China's investments in U.S. up sharply - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Obama calls for action on healthcare - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\To combat piracy, UCLA reaches for the Clicker Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Gates makes recommendations in Ft. Hood shooting case - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Bias raises gays' risk of mental disorders, witness at Prop. 8 trial says - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Quakes in Chile and Haiti raise concerns about Southland preparedness - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Education should accompany prostate screening, new guidelines say - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Woman killed by hit-and-run driver in Harbor Gateway neighborhood L.A. NOW Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Nigerian president's health remains a mystery - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\White House considers D.C. trial for Guantanamo detainee - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Aid surge gets going in Haiti - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\In Illinois, it's Kirk (R) vs Giannoulias (D) for Obama's former Senate seat Top of the Ticket Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Drug maker Johnson & Johnson paid kickbacks to mega-pharmacy, U.S. charges in civil complaint D.C. Now Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Suspect in plane bomb plot is reportedly cooperating again with authorities - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\California Medical Board admits allowing troubled doctor to monitor another - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\FDA issues warnings on food labels - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Cross found at Air Force Academy's Wicca center - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Among the gated enclaves, anger and fear over Chelsea King's killing - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Sept. 11 hijackers' Florida apartment building to be razed - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Rep. Charles Rangel temporarily steps down as Ways and Means chairman - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2010\Apple has a flickering fix for the 27-inch iMac Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Placentia teacher charged with molestation; prosecutors seek other victims - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Yelp names BBQ spot in Big Pine, CA the best restaurant in the country - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\California lawmakers heading to policy retreats to prepare for year - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Israel's spy agency denies opposing new Iran sanctions - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Obama won't meet with Netanyahu during U.S. visit, White House says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\3 arrested, drugs, military-style weapons seized in standoff - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\City National, L.A.'s 'bank to the stars,' sold for $5.4 billion - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Are drug-carrying drones the future of smuggling Probably not, DEA says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Dodger Stadium attacker to plead guilty to federal weapons charge - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Why Will Ferrell hit a cheerleader in the head with a basketball - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\GOP operative plans anti-Steyer effort - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Ex-state Sen. Calderon seeks another delay in public corruption trial - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Jury acquits skin-care guru accused of plot against rival - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\DreamWorks Animation to cut 500 jobs - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Glendale mental health center named for L.A. County Supervisor Antonovich - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Police appear to try to tip man out of his wheelchair in video - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Billionaire Tom Steyer eyes climate change, education in potential Senate bid - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\U.S. judge rejects overtime for home health aides - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Tom Steyer's exit shifts Senate race focus to Antonio Villaraigosa - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\St. Louis officers fatally shoot man armed with loaded gun, police say - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Thousands of fiery red crabs wash ashore in Newport Beach - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Environmentalist Tom Steyer opts out of Senate race - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Boston bombing trial delayed as jury selection bogs down - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\Kobe Bryant suffers torn rotator cuff in right shoulder - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.latimes.com\2015\India court orders activist on 14-year hunger strike freed - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\gotham External Affairs by JOEL ROSENBLATT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\movies In Brief Judy Berlin by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\intelligencer March 6, 2000 by BETH LANDMAN KEIL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\cityside The Laptop Nomads by MARK JACOBSON (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\cover story Silicon Alley 10003 by VANESSA GRIGORIADIS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\cover story Best Bets by CORKY POLLAN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\classical music Some Like It Tepid by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\features Song And Dance by MICHAEL TOMASKY (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\smart city Smooth Moves by ROSE-ANNE CLERMONT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\movies Man in the Muddle by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\gotham style The jean jacket, reconsidered (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\theater Love Canal by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\pop music Bold Ambition by ETHAN BROWN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\dance Roots by TOBI TOBIAS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\the city politic Screening Rooms by PEGGY EDERSHEIM KALB (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2000\sales & bargains Snake Charmers by SHYAMA PATEL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\Brella Bar Opens on Elizabeth St. - Sleep Opens in Williamsburg - Katrina Parris Moves to Harlem - Plain Sud Closing.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\A Crime Rap Sheet on Clinton Street.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\The Controversy Over the Harvey Milk School 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\I Am a Bird Now - New York Magazine Pop Music Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\New York's Latest Dating Fad.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\Co-op Boards and Brokers Cooperate - LeFrak City Expands to East 73rd St. - The Upper Upper West Side Price Difference.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\Remembering Architect Philip Johnson.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\Who Failed the C Train.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\Peter Hayn - New York Magazine Look Book.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\The Controversy Over the Harvey Milk School.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\Onera - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\The Controversy Over the Harvey Milk School 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\The Controversy Over the Harvey Milk School 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\The Controversy Over the Harvey Milk School 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\The Controversy Over the Harvey Milk School 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\Sukhadia's - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Jersey Shore to Miami Clubs Give Us Your Dignity -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Finding Out About Merrill Lynch�s CDO Problem Was Like Getting Kicked in the Balls, Says Former CEO -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Will Apple Announce a Mass iPhone Recall Friday -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\AIG Chairman Steps Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Shoeshiner Arrested for Burning Stand Down Twice, Returns to Work Next Day -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Attorney General Candidate Eric Schneiderman Gets Attention for the Wrong Reasons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Easter to Blame for Unemployment -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Comptroller DiNapoli Is Dragged Into Hevesi State Pension-Fund Scandal -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Former NYU Chemistry Director Submitted Over $400K Worth of Fake Receipts -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Taliban�s Terrifying Army of Monkey Terrorists Effectively Ends War -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Is Banksy�s Mr. Brainwash an Art-World Borat -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Snooki to Meghan McCain Your Dad Is Hot -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Cuban Spy Gets Life in Jail -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Bet You Thought Rachel Maddow Always Defied Her Gender Norms -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Mort Zuckerman Is Not an Obama Speechwriter -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Editor & Publisher Names Journal CEO Publisher of the Year -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Obama Orders Hospital Visitation Rights for Same-Sex Partners -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Pope Breaks Silence Over Recent Church Scandals -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Movies on the TV on the Computer on... -- The Projectionist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\See Sonic Youth�s Thurston Moore �Teaching� -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Manhattan Real Estate Market Reports Closings Up, Prices Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\U.S. Officials Knew About the Russian Spies for Ten Years -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Tea Party Express Releases Hit List -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Gaga for the Hamptons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2010\Mike Bolt, Keeper of the Stanley Cup, on His White Gloves and Hanging Out With John Cusack -- The Sports Section.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\Bradley Cooper Consumed 8,000 Calories a Day -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\Emma Sulkowicz to Attend State of the Union -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\The Most-Coveted Titles at Idea Books� New Store -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\Why Can�t Hollywood Get Women Journalists Right -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\What It�s Like to Date Your Dad -- Science of Us.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\Can This Congressman Make America More Zen -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\How to Revive Your Summer Stripes in Winter -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\Was That a Blow-Job Bib on Empire Last Night -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\Sorry, Glassholes Google to Scrap Google Glass -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\It�s Now Okay to Sleep Through a Fashion Show -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\What It�s Like to Be One of �Karl�s Boys� -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\The Pope Doesn�t Want You Breeding Like Rabbits -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\Guys, Drake Wants to Date a Writer -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\The Rise and Rise and Rise of ABC�s Ben Sherwood -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\53 Historians Weigh In on Barack Obama�s Legacy -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\Isaac Mizrahi Does Not Know What the Sun Is -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\Remember Glitter-Bombing Is Sparkly and Illegal -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\The Real, the Abstract The Work of Michael Mann -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\Report Frida Giannini Departed Early From Gucci -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\Kendall Jenner Reflects on Her Youth for WSJ -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\One Percent Have Almost Half the World�s Wealth -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\World Pissed That Women Just Want to Be Friends -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\Objects With More Chemistry Than 50 Shades Stars -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\U.K. Spy Agency Collected Journalists� Emails -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\Taylor Swift Wore Jodhpurs to Whole Foods -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\GeneralCCB\www.nymag.com\2015\Solange Posted the Sweetest MLK Tribute -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\Suspect in N.H. Machete Attack Regrets Girl Survived - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\Delays at Two Airports Thanks to Suspicious Luggage - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\Fla. Man Calls Police After Pot Plants Stolen - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Obama U.S. Intelligence Should Have 'Uncovered' Christmas Day Plot.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\Canada to Use Full-Body Scanners in Airports - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\United Airlines Pilot Charged With Being Over Alcohol Limit - International News News of the World Middle East News Europe News - FOXN.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - U.S. Feared Spectacular Terror Attack at Obama's Inauguration.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\Dubai Opens World's Tallest Building - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\FOXNews.com - Scientists Dolphins Should Be Treated as 'Non-Human Persons'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2010\U.S. Embassy in Yemen Reopens After Threat - Middle East Map News Crisis - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\State of the Union Obama proposals show president on �offense� despite GOP wave Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Purported cartel hit man arrested in disappearance of 43 Mexican students Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Today is 'saddest day of the year' -- and there's a 'Blue Monday' equation that 'explains' why Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\By 2050, cancer will hardly kill anyone under 80, researchers say Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Faith, community leaders, family gather in Atlanta to celebrate Martin Luther King Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\2015 Ford Shelby GT Debuts with 625 Horsepower Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\ISIS leader orders failed fighters executed, says report Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Stanford students to see what admissions officers wrote on their applications Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Glut is expected to drive down the price of milk Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Man dies in 1,000-foot fall from Alaska mountain Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Dad gets 'no-show' bill after son, 5, misses birthday party Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Colorado lawmakers look to ban asset seizures without convictions Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Oklahoma man at center of police shooting said he never made 911 call that led to raid Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Lebanon-born porn star draws fans, death threats after performing in hijab Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Official Cuba terror listing won�t affect US push for embassy, diplomatic ties Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\'Selma' sets off new fight over Martin Luther King's contested legacy Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Report Harry Reid 'most vulnerable' in 2016, may retire Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Study used to bolster NY fracking ban developed by anti-fracking 'activists' Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Ben & Jerry�s jumps on the cookie butter trend with new ice cream flavors Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Bible story Doubts raised over a Texas inaugural tradition Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Cecilia Abbott poised to become Texas� first Latina First Lady Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Pope expected to visit Philadelphia, White House, UN on September US trip Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Military continues development of stealth hybrid motorcycle Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\America pays tribute to Martin Luther King as events held across the nation Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Officials No others involved in Argentine special prosecutor death Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.foxnews.com\2015\Kentucky man who led 'Bonnie and Clyde' spree agrees to go home and face charges Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\Volcano Towns' Future Shrouded in Uncertainty.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20001110010200_www.latimes.com_sports_updates_lakers_lat_lakers001109.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\Jury Awards Smokers Millions for Ills Caused by Cigarettes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20000815052707_www.latimes.com_business_20000814_t000076119.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_seats000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_nation_updates_lat_sub000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20001110010200_www.latimes.com_travel_california_lat_oakland001105.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20000407195301_www.latimes.com_business_updates_lat_janitor000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_politics_elect2000_pres_lat_tension000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_mozambique000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20001110010200_www.calendarlive.com_top_1,1419,L_LATimes_Restaurants_X!PlaceDetail_35986,00.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\Experts Fret Over Effect of Gene Patents on Research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\Anti-Flag Trek Ends; Banner Yet Waves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\Janitors' Quest Complicated by Shifting Nature of the Job.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\For Web Crowd, Stock Market's No Party Pooper.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\Quackenbush Let Insurer Avoid State Investigation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_state_reports_youth_lat_kidcity000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_dems000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20000407195301_www.latimes.com_living_lat_ramos000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_protest000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\Makers of Smart Cards Are Betting Big on U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_costco000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\Elian to Be Returned to Father Next Week, Reno Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_monarch000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2000\Ivory Stash Driving Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050616023919_www_latimes_com_classified_realestate_hotprop_la_re_hotprop12jun12,0,7414036_story_coll_la_home_realestate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_nationworld_wire_ats_ap_top14jun15,0,2017011_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\Digging Into Seymour Hersh.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_commentary_la_oe_debellaigue15jun15,0,974217_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_la_ed_fedmay1mar01,0,7488388_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_outdoors_la_os_wildflowers1mar01,0,2946111_story_coll_la_home_outdoors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\Los Angeles Times - Postcards from Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_oe_morrison2mar02,0,3036572_column_coll_la_home_utilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_iraq_la_fg_tapes2mar02,0,2971270_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_politics_la_me_hertzlaw2mar02,0,6813645_story_coll_la_home_politics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_me_disney2mar02,0,5119900_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_calcook2mar02,0,7727608_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\Sex, Politics and President Hillary.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_nation_la_na_chicago2mar02,0,531573_story_coll_la_home_nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_mildredpierce2mar02,0,7398991_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_commentary_la_oe_bandow2mar02,0,4787970_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\Loss Strengthens Brothers' Bond.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\Judge Upholds S_D_ Mayoral Election.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\George Bush Talks Big, and He Delivers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_fi_kit2mar02,0,5314050_story_coll_la_home_local.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_sunday_commentary_la_op_faith12jun12,0,6389562_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_jews2mar02,0,5443257_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_ushaiti1mar01,0,3978550_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\Shiite Leads Iraq Vote; 3 Marines Killed.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_wire_ats_ap_top19mar02,0,181991_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Would-be LAX terrorist's prison sentence overturned as too lenient - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\AIG begins awarding abbreviated bonuses - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\FDA scolds maker of Tylenol for foot-dragging in recall - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\U.S. is coming to Haiti's aid as fast as it can, Pentagon officials say - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\China's investments in U.S. up sharply - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Obama calls for action on healthcare - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\To combat piracy, UCLA reaches for the Clicker Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Gates makes recommendations in Ft. Hood shooting case - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Bias raises gays' risk of mental disorders, witness at Prop. 8 trial says - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Quakes in Chile and Haiti raise concerns about Southland preparedness - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Education should accompany prostate screening, new guidelines say - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Woman killed by hit-and-run driver in Harbor Gateway neighborhood L.A. NOW Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Nigerian president's health remains a mystery - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\White House considers D.C. trial for Guantanamo detainee - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Aid surge gets going in Haiti - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\In Illinois, it's Kirk (R) vs Giannoulias (D) for Obama's former Senate seat Top of the Ticket Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Drug maker Johnson & Johnson paid kickbacks to mega-pharmacy, U.S. charges in civil complaint D.C. Now Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Suspect in plane bomb plot is reportedly cooperating again with authorities - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\California Medical Board admits allowing troubled doctor to monitor another - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\FDA issues warnings on food labels - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Cross found at Air Force Academy's Wicca center - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Among the gated enclaves, anger and fear over Chelsea King's killing - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Sept. 11 hijackers' Florida apartment building to be razed - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Rep. Charles Rangel temporarily steps down as Ways and Means chairman - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2010\Apple has a flickering fix for the 27-inch iMac Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Placentia teacher charged with molestation; prosecutors seek other victims - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Yelp names BBQ spot in Big Pine, CA the best restaurant in the country - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\California lawmakers heading to policy retreats to prepare for year - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Israel's spy agency denies opposing new Iran sanctions - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Obama won't meet with Netanyahu during U.S. visit, White House says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\3 arrested, drugs, military-style weapons seized in standoff - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\City National, L.A.'s 'bank to the stars,' sold for $5.4 billion - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Are drug-carrying drones the future of smuggling Probably not, DEA says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Dodger Stadium attacker to plead guilty to federal weapons charge - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Why Will Ferrell hit a cheerleader in the head with a basketball - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\GOP operative plans anti-Steyer effort - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Ex-state Sen. Calderon seeks another delay in public corruption trial - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Jury acquits skin-care guru accused of plot against rival - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\DreamWorks Animation to cut 500 jobs - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Glendale mental health center named for L.A. County Supervisor Antonovich - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Police appear to try to tip man out of his wheelchair in video - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Billionaire Tom Steyer eyes climate change, education in potential Senate bid - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\U.S. judge rejects overtime for home health aides - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Tom Steyer's exit shifts Senate race focus to Antonio Villaraigosa - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\St. Louis officers fatally shoot man armed with loaded gun, police say - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Thousands of fiery red crabs wash ashore in Newport Beach - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Environmentalist Tom Steyer opts out of Senate race - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Boston bombing trial delayed as jury selection bogs down - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\Kobe Bryant suffers torn rotator cuff in right shoulder - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.latimes.com\2015\India court orders activist on 14-year hunger strike freed - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\gotham External Affairs by JOEL ROSENBLATT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\movies In Brief Judy Berlin by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\intelligencer March 6, 2000 by BETH LANDMAN KEIL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\cityside The Laptop Nomads by MARK JACOBSON (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\cover story Silicon Alley 10003 by VANESSA GRIGORIADIS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\cover story Best Bets by CORKY POLLAN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\classical music Some Like It Tepid by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\features Song And Dance by MICHAEL TOMASKY (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\smart city Smooth Moves by ROSE-ANNE CLERMONT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\movies Man in the Muddle by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\gotham style The jean jacket, reconsidered (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\theater Love Canal by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\pop music Bold Ambition by ETHAN BROWN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\dance Roots by TOBI TOBIAS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\the city politic Screening Rooms by PEGGY EDERSHEIM KALB (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2000\sales & bargains Snake Charmers by SHYAMA PATEL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\Brella Bar Opens on Elizabeth St. - Sleep Opens in Williamsburg - Katrina Parris Moves to Harlem - Plain Sud Closing.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\A Crime Rap Sheet on Clinton Street.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\I Am a Bird Now - New York Magazine Pop Music Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\New York's Latest Dating Fad.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\Co-op Boards and Brokers Cooperate - LeFrak City Expands to East 73rd St. - The Upper Upper West Side Price Difference.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\Remembering Architect Philip Johnson.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\Who Failed the C Train.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\Peter Hayn - New York Magazine Look Book.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\Onera - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\Sukhadia's - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Jersey Shore to Miami Clubs Give Us Your Dignity -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Finding Out About Merrill Lynch�s CDO Problem Was Like Getting Kicked in the Balls, Says Former CEO -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Will Apple Announce a Mass iPhone Recall Friday -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\AIG Chairman Steps Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Shoeshiner Arrested for Burning Stand Down Twice, Returns to Work Next Day -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Attorney General Candidate Eric Schneiderman Gets Attention for the Wrong Reasons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Easter to Blame for Unemployment -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Comptroller DiNapoli Is Dragged Into Hevesi State Pension-Fund Scandal -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Former NYU Chemistry Director Submitted Over $400K Worth of Fake Receipts -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Taliban�s Terrifying Army of Monkey Terrorists Effectively Ends War -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Is Banksy�s Mr. Brainwash an Art-World Borat -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Snooki to Meghan McCain Your Dad Is Hot -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Cuban Spy Gets Life in Jail -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Bet You Thought Rachel Maddow Always Defied Her Gender Norms -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Mort Zuckerman Is Not an Obama Speechwriter -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Editor & Publisher Names Journal CEO Publisher of the Year -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Obama Orders Hospital Visitation Rights for Same-Sex Partners -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Pope Breaks Silence Over Recent Church Scandals -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Movies on the TV on the Computer on... -- The Projectionist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\See Sonic Youth�s Thurston Moore �Teaching� -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Manhattan Real Estate Market Reports Closings Up, Prices Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\U.S. Officials Knew About the Russian Spies for Ten Years -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Tea Party Express Releases Hit List -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Gaga for the Hamptons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2010\Mike Bolt, Keeper of the Stanley Cup, on His White Gloves and Hanging Out With John Cusack -- The Sports Section.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\Bradley Cooper Consumed 8,000 Calories a Day -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\Emma Sulkowicz to Attend State of the Union -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\The Most-Coveted Titles at Idea Books� New Store -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\Why Can�t Hollywood Get Women Journalists Right -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\What It�s Like to Date Your Dad -- Science of Us.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\Can This Congressman Make America More Zen -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\How to Revive Your Summer Stripes in Winter -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\Was That a Blow-Job Bib on Empire Last Night -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\Sorry, Glassholes Google to Scrap Google Glass -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\It�s Now Okay to Sleep Through a Fashion Show -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\What It�s Like to Be One of �Karl�s Boys� -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\The Pope Doesn�t Want You Breeding Like Rabbits -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\Guys, Drake Wants to Date a Writer -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\The Rise and Rise and Rise of ABC�s Ben Sherwood -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\53 Historians Weigh In on Barack Obama�s Legacy -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\Isaac Mizrahi Does Not Know What the Sun Is -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\Remember Glitter-Bombing Is Sparkly and Illegal -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\The Real, the Abstract The Work of Michael Mann -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\Report Frida Giannini Departed Early From Gucci -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\Kendall Jenner Reflects on Her Youth for WSJ -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\One Percent Have Almost Half the World�s Wealth -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\World Pissed That Women Just Want to Be Friends -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\Objects With More Chemistry Than 50 Shades Stars -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\U.K. Spy Agency Collected Journalists� Emails -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\Taylor Swift Wore Jodhpurs to Whole Foods -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\KFeatureExtractorDomFilter\www.nymag.com\2015\Solange Posted the Sweetest MLK Tribute -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_entertainment_citysearch.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2000\20001206091000_entertainment_msn_com_holiday_tv_asp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_movies_hotgossip.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_553499.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_543674.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\2005MSN - News - Going Batty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\2005beyond the cape and the cowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\2005MSN - News - ON SET Spielberg, Cruise Wage Realistic 'War'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20050708003338_entertainment_msn_com_movies_article_aspx_news_194668.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20050809075007_movies_msn_com_movies_filmfashion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\2005Movies -- Adult Fairy Tales_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_dvd_extras.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20050525031445_movies_msn_com_movies_summermovieguide_starwarstop10.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20051125082803_entertainment_msn_com_movies_hitlist_11_22_05.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20050512080141_entertainment_msn_com_celebs_article_aspx_news_190627.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20051217084007_entertainment_msn_com_movies_dvd_gay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_artistofthemonth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20050715084018_entertainment_msn_com_movies_comedies_ultimatemoviewedding.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20050520085141_entertainment_msn_com_music_hotgossipB.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_545751.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_tv_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\2005creepykids_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_genre_aspx_genre_Family.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\2005MSN - News - ABC Renews 'Housewives,' 'Lost'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\2005MSN - News - 'Ring Two' is Just Another Sequel_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20051212142307_entertainment_msn_com_music_hotgossipc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\2005MSN - News - Bridging the Generation Gap_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20050525031445_entertainment_msn_com_movies_movie_aspx_m_562330.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2005\20051125173813_entertainment_msn_com_movies_hotgossipb5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100821104840Mom & Pop Culture_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100818062626A Conversation with Christian Jacobs_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20101119161316_movies_msn_com_the_rundown_deathlyhallows_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100628220323_gameon_msn_com_articles_4900_E3_2010_Harry_Potter_and_the_Deathly_Hallows_x2013_Part_1_Videogame_Preview_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_music_newthisweek_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100903102232Reasons 'Scott Pilgrim vs_ the World' Tanked_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100417063002Despite Rumors, No New 'Spider-Man' Yet_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100419174756Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100807220743_gameon_msn_com_articles_5476_SDCC_10_The_Detail_of_DC_Universe_Online.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100429181007_gameon_msn_com_articles_4643_Super_Mario_Galaxy_2_Updated_Hands_on_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100417063002Alyssa Milano Gets 'Challenged'_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100916063351_movies_msn_com_mom_pop_culture_teen_flicks_classic_literature_photo_gallery_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100429181007_paralleluniverse_msn_com_features_tv_tales_from_the_fringe_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_mom_pop_culture_jonas_brothers_jonas_la_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100628220323_tv_msn_com_entourage_movie_debate_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_mom_pop_culture_chris_colfer_glee_story_interview__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20101003105944Goodnight, Sweet American Prince_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100628220323_paralleluniverse_msn_com_features_movies_green_hornet_trailer_story__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100316012929_movies_msn_com_teen_idols_photo_gallery_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100429181007_tv_msn_com_american_idol_midseason_report_card_story_feature__html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20101115094204127 Hours (2010)_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_mom_pop_culture_kidz_bop_mtv_artists_story_feature_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100916063351_music_msn_com_sarabareilles_interview_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20101007212328Catching Up To 'Star Wars' Franchise_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2010\20100916063351_tv_msn_com_tv_2010_mtv_vmas_swift_song_of_forgiveness_story_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Houston Family on Lifetime�s.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Analyst Greg Anthony suspended.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Fox Mulls More '24'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\How Many Celebrities Can You Pick Out.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Eva Longoria Signs On To Star In NBCs Telenovela.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\John Boehner Uses Taylor Swift GIFs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\2015 Critics' Choice Awards Michael Strahan Strips, Emily Blunt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Motley Crue Movie �The Dirt� Revived at Focus Features.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\�The Simpsons� Headed to �Minecraft�.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Gary Sinise Cast as Lead.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Marlboro Man Darrell Winfield Dead at 85 Wyoming Rancher Was First.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Lawyer Cosby wasn't in Los Angeles.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Oscars American Sniper Hits Target With Academy.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Citizen Kane Set for First-Ever.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Bill Cosby will perform in Colorado despite planned protests.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\NBC Developing Dolly Parton TV Movies.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Adele's new album delayed again.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Best-seller about journey to heaven is pulled.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Jennifer Lawrence, Francis Lawrence.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\�American Sniper� Earns Record-Breaking $30.5 Mil Friday.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Exclusive Grateful Dead to Reunite.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\TV Review Lifetimes Whitney.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Lea Michele, Joe Manganiello.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\�The Interview� Will Lose $30 Million.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\entertainment.msn.com\2015\Ann Curry To Depart NBC News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781513_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780947_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_743000_743718_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781199_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_europe_newsid_782000_782079_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_781000_781611_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782148_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_780000_780815_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_south_asia_newsid_781000_781659_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_136000_136248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782000_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_782000_782183_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_782000_782099_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_politics_newsid_780000_780486_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_asia_pacific_newsid_782000_782176_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_781000_781344_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_world_africa_newsid_782000_782212_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_780000_780980_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781834_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_782000_782032_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_health_newsid_781000_781089_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_business_newsid_781000_781964_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_sci_tech_newsid_778000_778274_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000619183245_news_bbc_co_uk_hi_english_business_newsid_469000_469640_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2000\20000608054457_news_bbc_co_uk_hi_english_uk_newsid_781000_781858_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_entertainment_4609819_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4593223_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050829231107_news_bbc_co_uk_1_hi_business_4193946_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_health_4607233_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_from_our_own_correspondent_4400865_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4602739_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_programmes_click_online_4398243_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_middle_east_4405337_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4610655_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_europe_4310789_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_science_nature_4399323_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_middle_east_4592237_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_business_4399537_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_technology_4607203_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_asia_pacific_4403591_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_science_nature_4610761_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610729_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_uk_news_4610755_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_europe_4610607_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_americas_4394561_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_talking_point_2806153_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4404661_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_business_4606197_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050605075721_news_bbc_co_uk_2_hi_americas_4608949_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2005\20050403061002_news_bbc_co_uk_2_hi_south_asia_4401645_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_7485331_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522746_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523125_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523325_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523182_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8523196_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523003_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522471_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522434_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8509333_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522732_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523248_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522268_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8523034_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522934_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8522001_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523289_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8523319_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_asia_pacific_8523328_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8521332_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8519807_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8522921_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_americas_8510900_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_business_8522606_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2010\20100219052801_news_bbc_co_uk_2_hi_africa_8522039_stm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Poor sleep 'early warning sign' for drink and drug issues.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - How do you get women into the workplace.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Lost Beagle2 probe found 'intact' on Mars.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Iranian paper shut over Clooney 'Je Suis Charlie' photo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - 2014 warmest year on record, say US researchers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC Sport - Jose Mourinho Chelsea manager praises 'perfect game' in Swansea.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Mexico missing students Guerreros Unidos gang member arrested.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Paris attacks Gunman Said Kouachi given unmarked grave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Afghan cabinet nominee on Interpol's most-wanted list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC Sport - Afcon 2015 Congo coach LeRoy angered by transport delay.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Is cheap oil good or bad for India Explained in 90 seconds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - CDC boss Zero Ebola cases possible in West Africa.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Pope Francis cuts short visit to typhoon-hit Tacloban.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - China boat capsize on Yangtze river kills 22.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Brazil 'outraged' by Indonesia drug trafficking execution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Yemen Shia Houthis seize president's chief of staff.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Greece arrests over Belgian 'jihadist terror plot'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Eurotunnel expects to resume some services later.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - How do immigrants forge a sense of identity.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Hackers on Blackhat Hollywood finally gets internet right.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Stress is 'barrier to feeling empathy for strangers'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo Niger protesters set churches on fire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - What's involved in cyber war games.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Charlie Hebdo 'Islamist cyber attacks' hit France.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.bbc.co.uk\2015\BBC News - Oscars head Isaacs calls for more diversity after row.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_bs_ford_earns_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_ap_history_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_hl_mammograms_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_hl_alternative_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wr_tech_mpcom_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_od_sex_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_bs_media_chriscraft_dc_10_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_ts_campaign_leadall_dc_41_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_pl_campaign_gore_dc_161_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_pl_campaign_bush_dc_295_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_re_britain_lennon_dc_9_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sp_al_newyork_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_wr_ntt_verio_dc_3_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_pl_campaign_atheist_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001017_ts_campaign_debate_dc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_wl_philippines_leadall_dc_8_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_tc_emc_earns_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_od_writers_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_sc_space_gammaray_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_nm_20001018_ts_markets_stocks_dc_11_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_wl_russia_submarine_dc_5_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_tc_broadcom_dc_4_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000814_re_leisure_page_dc_1_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20000815053113_dailynews_yahoo_com_h_nm_20000815_sc_cancer_breast_dc_2_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2000\20001018142612_dailynews_yahoo_com_h_abc_ts_story_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050731011340_news_yahoo_com_s_ap_20050730_ap_on_re_mi_ea_islam_and_constitution.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_eo_20050820_en_celeb_eo_17195.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_krwashbureau_20050830_ts_krwashbureau__galloway_column.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050819_ca_pr_on_sc_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_shuttle_departure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050817_ca_pr_on_sc_primate_protest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_thailandsouthunresttelecoms_050825104952.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_space_20050819_sc_space_scientistsmesswiththespeedoflight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050414035058_us_rd_yahoo_com_dailynews_fp_hist__story_news_yahoo_com_news_tmpl_story_u__ap_history.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050825221638_news_yahoo_com_s_sn_20050825_sp_sn_notebookyoungtalentgetsitsdue.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_nm_20050710_bs_nm_airlines_united_flightattendants_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20051025090737_adventures_yahoo_com_b_adventures_adventures1245.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050414035058_news_yahoo_com_news_tmpl_story_e_1_u__ap_20050414_ap_on_re_us_eric_rudolph_sid_84439559.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_milestogo.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050711080413_news_yahoo_com_s_ap_fake_research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_weeklystandard_20050816_cm_weeklystandard_oilspushandpull.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050901182436_news_yahoo_com_s_nm_20050901_od_nm_japan_doctor_dc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050821_ca_pr_on_sc_space_shuttle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_cpress_20050818_ca_pr_on_sc_wild_america.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_acs_20050818_hl_acs_studies_show_benefits_of_mammograms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050825195417_news_yahoo_com_s_afp_20050825_tc_afp_indiamotorolaus_050825131133.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050821_wl_afp_vaticanpopeyouth_050821215405.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_csm_20050819_ts_csm_apastorcorp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_afp_20050818_tc_afp_singaporeinternet.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2005\20050822064114_news_yahoo_com_s_latimests_20050821_ts_latimes_popeattacksrootsofterror.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_he_me_us_med_swine_flu.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_toys_lead_violations.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_re_eu_eu_latvia_town_for_sale.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_he_me_us_health_care_government_role.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_bi_ge_us_tec_toyota_car_electronics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_hi_te_us_books_amazon_macmillan.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_eu_eu_spain_teenage_matador.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_mu_us_haiti_bet_concert.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_google_book_battle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_pete_wentz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_go_pr_wh_us_obama_cooperation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_afp_20100206_lf_afp_lifestyleskoreatourismleisure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota_the_road_to_recalls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_sc_us_global_warming_pika.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_michael_jackson_doctor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_bi_ge_us_fda_dissolvable_tobacco.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_hi_te_us_tec_at_t_iphone_slingplayer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100204_ap_on_hi_te_us_pregnancy_campaign_text_messages.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_obama_jobs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_on_sc_eu_russia_space_station.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100205_ap_en_tv_us_people_charlie_sheen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_us_toyota.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_bi_ge_cn_finance_meetings.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_on_re_us_us_military_medals_impostors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2010\20100207053329_news_yahoo_com_s_ap_20100206_ap_en_ot_us_super_bowl_jessica_alba.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Sister Sheds Light on Relationship of Runaway Teen Duo - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Prosecutor Mother set newborn on fire on New Jersey road - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Police 2 dead, 1 wounded in central Florida mall shooting - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Boko Haram survivor told not to search for children, 'we killed them' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Sprint just stuck a dagger into AT&T�s net neutrality argument.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Washington Parents Using Marijuana Accused of Child Abuse - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Istanbul suicide bomber was 'teen widow of Norwegian IS jihadist' - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Sitting Clay Matthews was a Pete Carroll error - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\AP Exclusive Academy president responds to Oscar firestorm - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\The impossible has happened Washington has too much marijuana.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Man takes hostages in post office near Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Thousands see off Chadian troops to fight Boko Haram - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Charlie Hebdo increases print run to 7 million to keep up with international demand - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Kentucky teens on the run spotted panhandling in Florida Are they ready to surrender - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Deadly anti-Charlie Hebdo riots as France defends free speech - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\California girls' basketball coach suspended after 161-2 win - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Myanmar woman screams innocence before Saudi beheading video - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Husband injures wife, kills man, self in Florida mall - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Farms can be held liable for pollution from manure U.S. court - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Dying Oklahoma inmate's last words stir questions - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\130-year-old 'gun that won the West' found in US park - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Detective Teen confessed to killing teacher but denied rape - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Treasure Hunters Find Mysterious Shipwreck in Lake Michigan - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Russia could soon run multiple Ukraine-sized operations U.S. general - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\news.yahoo.com\2015\Body found at California desert resort is missing AIG exec police - Yahoo News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20001109100100_www_thenation_com_about_privacy_mhtml.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_scheer_s_20001107.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000609001528Honor the Blacklistees_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000706191801Gore's Oil Money_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000615030505Lennon's M15-FBI Files_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000620230325Microsoft's Fatal Error_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000511124527The Details of Life_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000711033021'Spies' Under the Persian Rug_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000609051234LAPD Law and Disorder_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20001122014700_www_thenation_com_doc_mhtml_i_20001120_s_greider.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000706212709Feingold on the Death Penalty_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001120_s_letter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000815074421Death Row Roll Call_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20001109100100_www_thenation_com_deathrow_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_pollitt.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000616155438Microsoft Judgment Day_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001106_s_hitchens.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000614172439America's Debt to Blacks_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000609230420Who Is Putin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20001109100100_www_thenation_com_doc_mhtml_i_20001023_s_scheer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000615183926The Secret History of Lead_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000711042145How a Caged Bird Learns to Sing_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000622092949Circus Minimus_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000608024311American Beauty or American Pie_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2000\20000612135502Justice for Bernard Baran_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050923075743_www_thenation_com_doc_20050919_blumenthal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050830012805_www_thenation_com_doc_20050912_houppert_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050828223608_www_thenation_com_doc_20050829_klein_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050831121004_www_thenation_com_doc_20050815_drmarc_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050917160111_www_thenation_com_doc_20050926_featherstone_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20041220_s_bletters.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050829191227_www_thenation_com_doc_20050829_scheer0824_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050325100759_www_thenation_com_doc_mhtml_i_20050411_s_greider_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_19461214_s_fisher.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050912_kim_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050801_cortright_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050829004813_www_thenation_com_doc_20050912_alterman_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050322085915_www_thenation_com_doc_mhtml_i_20050328_s_lazare_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050831111829_www_thenation_com_doc_20050829_ruskin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050828003452_www_thenation_com_doc_20050829_pollitt_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050829_segura_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050314084035_www_thenation_com_doc_mhtml_i_20050321_s_reading_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050103_s_deresiewicz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050104083617_www_thenation_com_edcut_index_mhtml_bid_7_pid_2097_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050419050730_www_thenation_com_doc_mhtml_i_20050425_s_stephanson_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_kimmerling.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050217015917_www_thenation_com_doc_mhtml_i_20050221_s_vest_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050104083617_www_thenation_com_doc_mhtml_i_20050110_s_eaton.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050129085245_www_thenation_com_doc_mhtml_i_20050214_s_solomon_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2005\20050830071016_www_thenation_com_doc_20050912_kaminer_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154481_right_angle_reid_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155833_tea_party_takeaways_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_154004_afternoon_delight_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20100609205420_www_thenation_com_blog_kucinich_israeli_assault_gaza_relief_ship_there_must_be_consequences_such_conduct_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156382_antichoicers_march_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20100812195010_www_thenation_com_blog_153951_women_taliban_and_time_cover_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20100903195802_www_thenation_com_audio_154226_breakdown_do_presidential_speeches_matter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154014_obama_right_and_race_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20101105013110_www_thenation_com_blog_cia_leak_case_comes_hollywood_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20101105013110_www_thenation_com_article_155835_fame_bee_dick_cavett_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20100813184322_www_thenation_com_blog_153893_same_sex_marriage_now_2010_issue_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156384_obama_without_tears#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156526_giving_season_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20100903195802_www_thenation_com_article_154484_china_drivers_seat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20101105013110_www_thenation_com_article_judging_elena_kagan_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20100609205420_www_thenation_com_article_vilification_helen_thomas_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20101201035509_www_thenation_com_blog_156741_pentagon_confirms_only_argument_remains_dont_ask_dont_tell_bigotry.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20100609205420_www_thenation_com_article_free_gaza#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156698_gearing_canc.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20101128210129_www_thenation_com_article_156647_tsastroturf_washington_lobbyists_and_koch_funded_libertarians_behind_tsa_scandal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20100813184322_www_thenation_com_article_154017_corpo_obama_geithner_petraeus_state_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20101202133847_www_thenation_com_article_156755_wikileaks_vs_empire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20101115084345_www_thenation_com_article_156429_colored_girls_tyler_perrys_film_enuf_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20100812195010_www_thenation_com_article_38008_wedge_against_tyranny#comment_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2010\20100903195802_www_thenation_com_blog_154104_how_media_created_monster_sarah_palin_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\Obama�s Right to Embrace the Public Option for Closing the Digital Divide The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\What Americans Should Do After Charlie Hebdo The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\Is Hawai�i an Occupied State The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\Big Energy Says the Future Is Bright and Full of SUVs for Everyone The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\Egypt�s Revolutionary Artist Takes On the NYPD The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\Why It�s OK for �The New York Times� to Not Be Charlie The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\�She's Beautiful When She�s Angry� Reveals the Radical Ordinary Women of 1960s Feminism The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\January 16, 1919 The 18th Amendment Is Ratified, Prohibition of Alcohol Becomes Law The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\Honor King�s Legacy by Protecting Voting Rights The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\When Free Doesn�t Mean Fair for Community College The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\Stephen Cohen It�s 1983 All Over Again in Ukraine The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\What �The Interview� Gets Right�and Wrong�About US Policy Toward North Korea The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\As Top Democrats Embrace a Robin Hood Tax, It�s Time for Activists to Go Big The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\Judge Rules That Home Care Workers Are Really Just �Companions� The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\After 86 Years the Palestinian National Soccer Team Finally Arrives The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\January 17, 1961 President Eisenhower Warns of the �Military-Industrial Complex� in His Farewell Address The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\The Lower Depths The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\Can Podemos Win in Spain The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\The Iraq War Strategy 3.0 If You Can�t Win, At Least Make Money The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\Poor Guatemalans Are Taking On North American Mining Companies�and Have the Bullet Wounds to Prove It The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\Did Obama Just Introduce a �Public Option� for Higher Education The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\New Year, Same National Youth Groundswell The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\We�ll Need an Economic Program to Make #BlackLivesMatter. Here Are Three Ideas. The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\Katrina vanden Heuvel We�re Already Losing the Lessons of the Paris Unity March.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\thenation.com\2015\Perhaps the Most Important Question About the Democratic Party Right Now The Nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\Grounded and grumpy How to prevent flight delays from spoiling summer travel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_president_election_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_interactive_legal_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_disenfranchised_voters_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\Northern lights join meteors in dazzling sky display.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\20001110195651_www_cnn_com_2000_ALLPOLITICS_stories_11_10_election_president_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\Bad weather hampers Russian submarine rescue - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\Tropical Storm Beryl bears down on Texas-Mexico border - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\20001204140149_www_cnn_com_2000_ALLPOLITICS_stories_12_03_karl_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_transition_funds.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\China noncommittal on Arafat's timetable for Palestinian state - August 12, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_florida_legislature_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\North and South Korea prepare for first family reunions since 1985 - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\Diabetes patients, researchers temper hope for quick cure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_newmexico_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\CNN AllPolitics_com - Election 2000 - The Democratic National Convention.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_bush_hipsurgery_ap_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\CNN_com - Convention opens to protests, rubber bullets - August 15, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\20001204140132_www_cnn_com_2000_ALLPOLITICS_stories_12_02_potter_debrief_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\Gore, in St_ Louis, says Bush would put U_S_ back in record red ink - August 14, 2000.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\Titans rout Rams 30-3 in rematch of Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\Thompson turns back Torres in record 100 free.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\20001204202200_www10_cnn_com_2000_ALLPOLITICS_stories_12_04_recount_wrap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2000\Another strong solar flare heads toward Earth.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_US_Careers_06_30_background_check_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_pit_bulls_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_pygmy_elephants_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_SHOWBIZ_Movies_11_04_review_jarhead_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_SHOWBIZ_Music_09_01_katrina_fatsdomino_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_POLITICS_09_01_bush_katrina_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_WORLD_europe_07_01_italy_cia_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_meast_11_06_iran_nuclear_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_internet_07_01_internet_intelligence_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_LAW_11_05_klan_bomber_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_09_01_katrina_warming_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_ozone_layer_reut_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_royal_visit_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_HEALTH_conditions_11_02_discala_eating_disorder_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_POLITICS_07_01_resignation_next_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_US_11_06_katrina_soldier_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_HEALTH_08_31_katrina_medical_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TRAVEL_DESTINATIONS_08_31_72_holes_long_island_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TRAVEL_07_01_flight_delays_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_WORLD_meast_09_01_lebanon_arrests_syria_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20051107021244_www_cnn_com_2005_WORLD_europe_11_06_france_riots_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_TECH_science_07_01_green_taxis_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_TECH_science_08_31_chimp_genes_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050901192556_www_cnn_com_2005_LAW_08_31_aruba_missing_teen_ap_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2005\20050702081747_www_cnn_com_2005_SHOWBIZ_Music_07_01_live8_japan_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_wayoflife_02_02_mf_famous_smoking_measures_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_europe_02_02_france_concorde_trial_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_HEALTH_02_02_pills_medication_compliance_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_HEALTH_10_13_company_funded_research_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_africa_02_10_safrica_orphanage_fire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_diane_watson_retire_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20101014064028_www_cnn_com_2010_LIVING_09_16_cnnheroes_tribute_show__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_POLITICS_02_10_charlie_wilson_obit_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_02_twitter_phishing_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_space_02_02_nasa_constellation_reaction_index_html_hpt_C2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_meast_02_02_iran_protest_sentences_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_WORLD_americas_02_03_haiti_border_arrests_index_html_hpt_C1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_OPINION_02_02_ensler_TED_talk_girl_power_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_11_iran_revolution_anniversary_index_html_hpt_T1.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20101014064028_news_blogs_cnn_com_2010_10_13_last_miner_out_is_groups_captain__hpt_T2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_jerusalem_road_discovery_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_SPORT_02_10_iran_olympic_skier_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_meast_02_10_iran_information_blockade_index_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_asiapcf_02_08_vbs_north_korea_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_HEALTH_expert_q_a_02_09_bipolar_schizophrenia_similar_raison_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100211150102_marquee_blogs_cnn_com_2010_02_10_idols_kara_dioguardi_keeps_it_to_herself__hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_TECH_02_01_ipad_apps_apple_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100211150102_www_cnn_com_2010_WORLD_americas_02_09_cuba_15_index_html_hpt_Mid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_POLITICS_02_01_us_pentagon_review_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2010\20100203121620_www_cnn_com_2010_LIVING_personal_02_02_o_when_to_hold_grudge_index_html_hpt_Sbin.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Obama tax plan Middle-class credits, increases for rich - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Like life there, cockfighting in Cuba is a gray area - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\The truth is still out there Fox eyes 'X-Files' reboot - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Traffic lights on your windshield could get you home faster - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Icy roads cause hundreds of accidents from Philly to New York and Connecticut - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\No charges after Oklahoma police chief shot four times - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Penn State still doesn't get it - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Malawi Deadly floods menace tens of thousands - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Pope Francis Millions gather in Manila for Mass - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\ISIS militants free hundreds of Yazidis in Iraq, local official says - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\2014 was Earth's hottest year on record - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Israeli strike kills son of top Hezbollah commander - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Cuba's money makers expect good times as Obama eases restrictions - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Muslim 'angels' ease American's prison days in UAE - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\How to talk to kids about racism - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Boko Haram strikes in Cameroon as foreign troops arrive from Chad - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\UK's Beagle 2 lander spotted on Mars - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\'Sea of Blue' rally backs police - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Why Obama's going to India after his State of the Union speech - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Europe terror threat Greece aids Belgium with investigation - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Fitness expert Greg Plitt dies after being struck by train - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Astronaut set to spend one year in space - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Arsenal sweep Man City aside - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Miss Lebanon distances herself from photo with Miss Israel - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.cnn.com\2015\Delaware shooting occurs near unoccupied Biden home - CNN.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Books.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Sedaris - Common Relationship Misconceptions.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Money Green - How to Pick the Right Basket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Style EsQ&A.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Money - Stick A Fork In It.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Feature Story eBook Edition.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Feature Story - Bill Clinton The Exit Interview.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Money Green - Three Ways to Be a Pessimist 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Sedaris - The Youth in Asia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Feature Story - The Old Lady Down the Hall 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Dubious Achievements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Style EsQ&A - When Cuffs Meet Business Casual.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Sedaris - Jesus Shaves 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Sedaris - Giant Dreams, Midget Abilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Money - The Root of the Matter.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Women Women We Love Gallery - Carrie-Anne Moss.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2000\Esquire Money Green - The Master's Voice 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part 1).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story The Story of Cadillac Man and the Land of the Lost Souls.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire's Howard Stern, American.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story Dear Mr_ President, Here's How to___ 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story Mrs_ Hughes Takes Her Leave.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story The Art in Commerce.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story The Best New Restaurants, 2005.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Features What I've Learned Charles H_ Townes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story My Father's Fashion Tips.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story The Giveaway John Mayer's Songwriting Contest.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story Who the Hell Is Ronnie Earle.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story The Man Who Retired 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story Poolside with Yoda.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story The American Dream.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story The Making of the Twenty-First-Century Soldier (Part1) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2005\Esquire Feature Story Gary Webb, 1955 - 2004.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Carol Bartz Bio - Quotes by Yahoo CEO Carol Bartz - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Rochelle Aytes Picture - Hot Pic of Rochelle Aytes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Charles Schwab Interview - Charles Schwab Quotes - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Peter Orszag Interview - Budget Director Peter Orszag on Health Care - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Republican Response to Gulf Oil Spill 2010 - Are Republicans Losing Their Minds - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Lost Season 6 Episode 5 Recap - Lost Daddy Issues - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Benefits of Quitting - Things to Give Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Leonardo DiCaprio Quotes - Leonardo DiCaprio Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Scott Brown Jobs Bill - Fake Underdogs in Washington and Hollywood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Jersey Shore Porno War - Taryn Thomas on Jersey Shore XXX - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Producer David Brown Dead - David Brown Quotes in Esquire Interview - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Future of Torture Debate - John Richardson on Justice Dept Torture Reports - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\David Lynch Style Icon � Director David Lynch Style - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Future of Private Celebrities - Why Famous Recluses Are the New Superstars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Cinco de Mayo Beers - Beyond Margarita Recipes for Cinco de Mayo - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Frightened Rabbit Review - Swim Until You Can't See Land Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Boycott Arizona Immigration Law SB1070 - Charles Pierce on States Rights 2010 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Times Square Bomber Arrest - Analysis of Pakistan Link to Times Square Bomber - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\James Beard Awards Predictions - Who Will Win the James Beard Awards - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Healthcare Summit Live on CSPAN - Watch Health Care Summit Live Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Controlled Demolition Inc - Building Demolitions Online - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Raul Grijalva Immigration - Arizona Immigration with Congressman Raul Grijalva - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Fort Hood Shooting - Fort Hood Texas Shooting Report - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2010\Women in a Mans Life - Different Kinds of Women - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Firefighter GoPro Video - Australia Brushfire Video - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Alex Malarkey Recants Story - The Boy Who Came Back to Heaven Book Was Made Up - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Watch Corner Boys - 'SNL' Sets 'The Wire' in Brooklyn - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\The Atrocious, Forgotten Style of the Planet Hollywood Era - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Ferguson Peacekeeper Ron Johnson on What He's Learned - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\A Church Burns In Ferguson - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\'Lego Movie' Snub - 'Lego Movie' Directors React to Oscars - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Longer Work Days Linked to Alcohol Abuse - Alcohol Risks - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\The CIA Has Created A Culture Of Credulous Fear In This Country - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Michael Brown Sr. Interview - Ferguson Shooting Victim's Father - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Kaleb Whitby Crash - Man Survives Being Wedged Between Two Semis - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Round-Up Of The Weekend News Shows_1.19.15 - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Green Bay Packers Settlers of Catan - Packers Bond Over Board Game - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Justin Townes Earle Interview - Justin Townes Earle Talks Fatherhood - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Ethan Hawke Reddit AMA - Ethan Hawke Robin Williams Thoughts - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Can Ernst Capitalize After President Obama's State Of The Union, Unlike Jindal And Rubio Before Her - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Joe Paterno's Wins Restored - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Key and Peele Predict the NFL's Conference Championship Games - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\This Is Not Carson's First Foray Forward, Into The Past - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\The Fire Next Time Dispatch From Ferguson, Missouri - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\New York City At Night - Aerial Photos of New York City - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Remembering Martin Luther King, Jr. And The Others Involved In The Selma March - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Ben Schwartz Life Advice - Ben Schwartz Ask a Comedian - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Larry Wilmore Steps Into the Spotlight - 'The Nightly Show with Larry Wilmore' on Comedy Central - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\The Paul Family And The Fringe Characters They Engage With - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.esquire.com\2015\Elon Musk Test Track for 760-mph Hyperloop on the Way - Esquire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu2_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_feb_0229_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbesglobal_00_0306_0305042a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000301094642_www_forbes_com_fyi_00_0306_086_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu8_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu5_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbesbest_com_b2b.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0320_6507164a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000301094642_www_forbes_com_asap_00_0221_073_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_jul_0706_feat_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_legal_disclaimer_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Feb_0229_mu4_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu10_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_letters_index_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000301094642_www_forbes_com_columnists_swartz.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbes_00_0724_6517145a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbes_com_columnists_flint.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_00_Mar_0301_mu1_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_toolbox_billnew.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbes_com_conf_.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbes_com_tool_html_00_Jul_0706_mu3_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbes_com_forbesglobal_00_0703_0313056a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000301094642_www_forbes_com_tool_html_privacy_state_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000301094642_www_forbes_com_forbes_00_0306_6506126a_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbes_com_contact.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbes_com_fyi_00_0501_108_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2000\20000707001254_www_forbes_com_asap_00_0529_098_htm.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050129041123_forbes_com_home_2005_01_28_cx_pp_0127apprentice_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\Death By Starvation - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954531_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_healthcare_2005_01_12_cx_mh_0112fdaintro_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050129041123_forbes_com_home_free_forbes_2005_0214_060_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805860_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050129041123_forbes_com_home_commerce_2005_01_27_cx_de_0127bubblebowl_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050418235008_forbes_com_home_personaltech_2005_04_18_cx_ld_0418adbe_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0411_068_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_03_cx_pm_0103sneakpeeklandsp05_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050129041123_forbes_com_home_business_2005_01_28_cx_ld_0128mergertable_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_feeds_ap_2005_02_04_ap1805853_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954632_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\Update 4 Boeing, Airbus Announce New Orders at Show - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050129041123_forbes_com_home_feeds_ap_2005_01_28_ap1791317_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050418235008_forbes_com_home_2005_04_18_forbesradio0418_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050418235008_forbes_com_home_feeds_ap_2005_04_18_ap1954310_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050418235008_forbes_com_home_free_forbes_2005_0425_052_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_enterprisetech_2005_02_03_cx_de_0203letter_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050418235008_forbes_com_home_forbes_2005_0425_100_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_17_cx_dl_0117feat_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050129041123_forbes_com_home_manufacturing_2005_01_28_cx_sr_0128gillette_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050129041123_forbes_com_home_columnists_2005_01_28_cz_mf_0128freedmandavos_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050204143419_www_forbes_com_home_business_2005_02_03_cz_0203fullsegments9and10africa_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2005\20050129041123_forbes_com_home_vehicles_2005_01_24_cx_dl_0124feat_ls_html.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_05_23_apple_google_sony_cmo_network_global_reputable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_02_broadband_wifi_telecom_technology_cio_network_wired.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_08_best_worst_jobs_leadership_careers_hiring_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100602102224_www.forbes.com_forbes_2010_0607_health_probiotics_vitamins_supplements.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_17_best_worst_paying_jobs_leadership_careers_salaries.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_19_college_tuition_risk_public_personal_finance_tuition_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_23_fiduciary_standard_financial_advisor_network_broker_dealer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_30_best_actors_for_the_buck_business_entertainment_actors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_06_10_companies_financial_risk_personal_finance_risk_list.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_07_07_richest_royals_wealth_monarch_wedding_divorce_billionaire.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\ABC returns to Cablevision, but talks go on - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_08_16_stressful_cities_health_lifestyle_real_estate_pressure.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100602102224_www.forbes.com_2010_05_18_most_overpriced_cars_lifestyle_vehicles_detroit_ford_truck_msrp.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_01_13_best_internships_jobs_leadership_careers_employment_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_sciencebiz_2010_05_in_sleepy_geneva_plotting_to_thwart_a_killer.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_04_01_amazon_apple_netflix_business_fastest_growing_retailers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_coolest_cities_metros_lifestyle_real_estate_cool.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100602102224_blogs_forbes_com_beltway_2010_05_18_business_to_eye_iran_sanctions_deal_closely__boxes_Homepagecolumnsblogs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_04_27_cities_jobs_detroit_opinions_columnists_joel_kotkin_html_boxes_Homepagetopspecialreports.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100927030125_www.forbes.com_2010_08_02_best_fans_teams_lifestyle_sports_fandom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_09_28_borrow_lend_personal_finance_money_ethics_responsibility.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100602102224_www_forbes_com_2010_03_24_gold_metals_investing_personal_finance_etf_inflation_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_03_25_apple_tablet_computer_technology_ipad10_land.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\20100930232813_www.forbes.com_2010_05_03_best_paying_jobs_women_salary_forbes_woman_leadership_careers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2010\AIG sells Alico unit to MetLife for $15.5 billion - Forbes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\30 Under 30 Young Scientists Who Are Changing The World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\John Legend And Idina Menzel To Perform At The Super Bowl.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\What Should Data Scientists Know.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Report To Restore Middle-Skills Jobs, Employers, Educators, Policymakers Must Work Together.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Where Big Data Jobs Will Be In 2015.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Jawbone Jumps Into Employee Monitoring - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Could Plug-And-Play Be The Future Of Healthcare Diagnostics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Broken Ladders.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Android Circuit Nobody Loves Lollipop 5.0, Sony's Sad 'For Sale' Sign, Samsung's Galaxy S6 Mistake.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\30 Under 30 The Food & Drink Masters Changing How We Eat.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Why Big Data Has Everything To Do With The Price Of Oil.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Oh Switzerland, What Have You Done p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\What The Rich Are Doing With Their Money In 2015 -- And You Should, Too p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Obamacare Chief Marilyn Tavenner In Her Own Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\FAFSA Follies Getting College Aid.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets - Forbes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Putin Sacks U.S. Trained Economist In Charge Of Ruble At Central Bank - Forbes p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\The Bachelor's Degrees With The Highest Salary Potential.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\How Can Data Scientists Build Trust In Advanced Analytics p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Battered Hedge Fund Billionaire John Paulson Adds To Merger Bets.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.forbes.com\2015\Friday Box Office 'Wedding Ringer,' 'Paddington' Score, 'Blackhat' Bombs p2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Are the Courts Going Bananas An Ape is the Plaintiff!.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Minister Who Held Funeral for Columbine Killer Resigns.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Aspiring Politician on Trial in Slaying of State Senator.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Bawdy in the Bedroom.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\At Least 27 Killed in Spain When Truck Collides With Bus.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Travel The Vertical Village.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Fox411 by Roger Friedman July 6, 2000 'Sopranos' Mother, Golden Globe Junket.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Quick Thinking by Kidnapped 8-Year-Old May Have Saved Her Life.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Incredible Shrinking Doctors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\A New Language From Microsoft.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Paul McCartney Makes New Beatles Record.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Pre-Paid Traffic Tickets Spark National Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\U.S. Gas Prices Down Over 7 Cents.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Bloomberg CEO Credited With Key Role in Foiling Alleged Cyber Extortion.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\F I R E S.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Chances of Rescue 'Not Good' for Russian Nuke Sub Stuck on Ocean Floor.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\FOXSports.com - Tennis - Wimbledon 2000 - Venus Williams beats sister Serena in Wimbledon semis.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Hollow Man, Space Cowboy Tie For Top of Box-Office.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Tropical Storm Beryl Threatens Texas, Mexico.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\The NTSB Narrative of the Crash.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Buchanan Bigots Need Not Apply.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\The Final Report.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Judge Sauls' Decision.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Longtime Clinton Associate Faces Indictment on Campaign-Finance Charges.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2000\Most Troubled Tires Came From Illinois Plant, Ford Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\Profile Judge Priscilla Owen.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - Indecent Proposal Expanding FCC Speech Controls to Cable.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - EPA Rules Raise Gas Prices.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Mercury Rises Over EPA Pollution Rules.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Politics - Airspace Violation Interrupts Senate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\Quotes Senators Discuss Deal.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Politics - Roberts Down to Business on First Day.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Charting Oil-for-Food's Trail in the U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - The Cost of Freedom - The Cost of Freedom Recap - Recap of Saturday, September 17.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Politics - Rehnquist at D.C. Medical Dept..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Politics - Staffers Had Rolled Out the Cots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - 'Robo-Soldier' Ready to Report for Duty - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Police Remains Found Are Missing Va. Student's - U.S. & World.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Crews Focus on Brush in Calif. Firefight.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Vaccine Disease Protections Outweigh Side Effects.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Views - Junk Science - Study Disproves Antibacterial Soap Scare.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - U.S. Must Take Firm Stand on Iran.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Politics - Bush Picks Miers for Supreme Court.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Politics - Signs Speak as Loud as Words.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 3 4 05 FOX Poll Names in the News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - On the Record w Greta - GretaWire - Operation Falcon.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - War Games Russia, China Grow Alliance.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - FOX News Polls - FNC Poll - 10 05 05 FOX Poll Rudy in 2008 Giuliani Tops 'Strong Leader' List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - U.S. & World - Lower Fla. Keys Ordered Evacuated.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2005\FOXNews.com - Views - Heritage Foundation - America's Self-Imposed Energy Shortage.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\6 Earthquakes Rock Solomon Islands - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\FOXNews.com - CIA Base Bomber Was Jordanian Extremist Double Agent.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\Intel Chief Calls For Changes After Attack � Liveshots.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\Slovak Man Takes Hidden Explosive on Dublin Flight - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\Family, Friends Launch Online Campaign to Find Missing Utah Mom - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\FOXNews.com - Departure of Dorgan Unsettles Senate Democrats.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\FOXNews.com - Hubble Takes Earliest Snapshot of Infant Universe.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\FOXNews.com - Obama to Meet With Security Officials, Unveil Reforms.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\Las Vegas Gunman Angry at U.S. Government, Police Say - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\FOXNews.com - Google Unveils 'Super Phone' Nexus One, Online Cell Store.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\FOXNews.com - Iranian Lawmakers Reject Kerry Request to Visit.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\FOXNews.com - Dozens of Names Shifted to No-Fly List.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\Report Freed Guantanamo Detainees Head to Yemen to Rejoin Al Qaeda - International News News of the World Middle East News Europe New.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\Suspect in N.H. Machete Attack Regrets Girl Survived - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\Delays at Two Airports Thanks to Suspicious Luggage - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\Fla. Man Calls Police After Pot Plants Stolen - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\FOXNews.com - Obama U.S. Intelligence Should Have 'Uncovered' Christmas Day Plot.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\Bomber Fooled CIA, Family, Jordanian Intelligence - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\Canada to Use Full-Body Scanners in Airports - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\TSA Allegedly Waited 80 Minutes Before Reporting Breach at N.J. Airport - Local News News Articles National News US News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\United Airlines Pilot Charged With Being Over Alcohol Limit - International News News of the World Middle East News Europe News - FOXN.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\FOXNews.com - U.S. Feared Spectacular Terror Attack at Obama's Inauguration.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\Dubai Opens World's Tallest Building - International News News of the World Middle East News Europe News - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\FOXNews.com - Scientists Dolphins Should Be Treated as 'Non-Human Persons'.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2010\U.S. Embassy in Yemen Reopens After Threat - Middle East Map News Crisis - FOXNews.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\State of the Union Obama proposals show president on �offense� despite GOP wave Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Purported cartel hit man arrested in disappearance of 43 Mexican students Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Today is 'saddest day of the year' -- and there's a 'Blue Monday' equation that 'explains' why Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\By 2050, cancer will hardly kill anyone under 80, researchers say Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Faith, community leaders, family gather in Atlanta to celebrate Martin Luther King Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\2015 Ford Shelby GT Debuts with 625 Horsepower Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\ISIS leader orders failed fighters executed, says report Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Stanford students to see what admissions officers wrote on their applications Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Glut is expected to drive down the price of milk Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Man dies in 1,000-foot fall from Alaska mountain Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Dad gets 'no-show' bill after son, 5, misses birthday party Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Colorado lawmakers look to ban asset seizures without convictions Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Oklahoma man at center of police shooting said he never made 911 call that led to raid Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Lebanon-born porn star draws fans, death threats after performing in hijab Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Official Cuba terror listing won�t affect US push for embassy, diplomatic ties Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\'Selma' sets off new fight over Martin Luther King's contested legacy Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Report Harry Reid 'most vulnerable' in 2016, may retire Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Study used to bolster NY fracking ban developed by anti-fracking 'activists' Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Ben & Jerry�s jumps on the cookie butter trend with new ice cream flavors Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Bible story Doubts raised over a Texas inaugural tradition Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Cecilia Abbott poised to become Texas� first Latina First Lady Fox News Latino.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Pope expected to visit Philadelphia, White House, UN on September US trip Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Military continues development of stealth hybrid motorcycle Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\America pays tribute to Martin Luther King as events held across the nation Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Officials No others involved in Argentine special prosecutor death Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.foxnews.com\2015\Kentucky man who led 'Bonnie and Clyde' spree agrees to go home and face charges Fox News.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\Volcano Towns' Future Shrouded in Uncertainty.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20001110010200_www.latimes.com_sports_updates_lakers_lat_lakers001109.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\Jury Awards Smokers Millions for Ills Caused by Cigarettes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20000815052707_www.latimes.com_business_20000814_t000076119.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_seats000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_nation_updates_lat_sub000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20001110010200_www.latimes.com_travel_california_lat_oakland001105.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20000407195301_www.latimes.com_business_updates_lat_janitor000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_politics_elect2000_pres_lat_tension000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_mozambique000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20001110010200_www.calendarlive.com_top_1,1419,L_LATimes_Restaurants_X!PlaceDetail_35986,00.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\Experts Fret Over Effect of Gene Patents on Research.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\Anti-Flag Trek Ends; Banner Yet Waves.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\Janitors' Quest Complicated by Shifting Nature of the Job.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\For Web Crowd, Stock Market's No Party Pooper.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\Quackenbush Let Insurer Avoid State Investigation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_state_reports_youth_lat_kidcity000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_dems000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20000407195301_www.latimes.com_living_lat_ramos000407.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_politics_elect2000_pres_demconven_upd_protest000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\Makers of Smart Cards Are Betting Big on U.S..txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20000815052707_www.latimes.com_news_state_updates_lat_costco000814.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\Elian to Be Returned to Father Next Week, Reno Says.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\20000229101800_www.latimes.com_news_nation_updates_lat_monarch000229.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2000\Ivory Stash Driving Debate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050616023919_www_latimes_com_classified_realestate_hotprop_la_re_hotprop12jun12,0,7414036_story_coll_la_home_realestate.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_nationworld_wire_ats_ap_top14jun15,0,2017011_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\Digging Into Seymour Hersh.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_commentary_la_oe_debellaigue15jun15,0,974217_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_la_ed_fedmay1mar01,0,7488388_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_outdoors_la_os_wildflowers1mar01,0,2946111_story_coll_la_home_outdoors.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\Los Angeles Times - Postcards from Paris.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_oe_morrison2mar02,0,3036572_column_coll_la_home_utilities.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_iraq_la_fg_tapes2mar02,0,2971270_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_politics_la_me_hertzlaw2mar02,0,6813645_story_coll_la_home_politics.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_me_disney2mar02,0,5119900_story_coll_la_home_headlines.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_calcook2mar02,0,7727608_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\Sex, Politics and President Hillary.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_nation_la_na_chicago2mar02,0,531573_story_coll_la_home_nation.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050302091936_www_latimes_com_features_food_la_fo_mildredpierce2mar02,0,7398991_story_coll_la_home_food.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_opinion_commentary_la_oe_bandow2mar02,0,4787970_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\Loss Strengthens Brothers' Bond.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\Judge Upholds S_D_ Mayoral Election.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\George Bush Talks Big, and He Delivers.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_local_la_fi_kit2mar02,0,5314050_story_coll_la_home_local.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050616023919_www_latimes_com_news_opinion_sunday_commentary_la_op_faith12jun12,0,6389562_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_jews2mar02,0,5443257_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_world_la_fg_ushaiti1mar01,0,3978550_story_coll_la_home_world.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\Shiite Leads Iraq Vote; 3 Marines Killed.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2005\20050302091936_www_latimes_com_news_nationworld_wire_ats_ap_top19mar02,0,181991_story.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Would-be LAX terrorist's prison sentence overturned as too lenient - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\AIG begins awarding abbreviated bonuses - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\FDA scolds maker of Tylenol for foot-dragging in recall - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\U.S. is coming to Haiti's aid as fast as it can, Pentagon officials say - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\China's investments in U.S. up sharply - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Obama calls for action on healthcare - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\To combat piracy, UCLA reaches for the Clicker Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Gates makes recommendations in Ft. Hood shooting case - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Bias raises gays' risk of mental disorders, witness at Prop. 8 trial says - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Quakes in Chile and Haiti raise concerns about Southland preparedness - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Education should accompany prostate screening, new guidelines say - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Woman killed by hit-and-run driver in Harbor Gateway neighborhood L.A. NOW Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Nigerian president's health remains a mystery - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\White House considers D.C. trial for Guantanamo detainee - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Aid surge gets going in Haiti - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\In Illinois, it's Kirk (R) vs Giannoulias (D) for Obama's former Senate seat Top of the Ticket Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Drug maker Johnson & Johnson paid kickbacks to mega-pharmacy, U.S. charges in civil complaint D.C. Now Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Suspect in plane bomb plot is reportedly cooperating again with authorities - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\California Medical Board admits allowing troubled doctor to monitor another - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\FDA issues warnings on food labels - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Cross found at Air Force Academy's Wicca center - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Among the gated enclaves, anger and fear over Chelsea King's killing - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Sept. 11 hijackers' Florida apartment building to be razed - latimes.com.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Rep. Charles Rangel temporarily steps down as Ways and Means chairman - Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2010\Apple has a flickering fix for the 27-inch iMac Technology Los Angeles Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Placentia teacher charged with molestation; prosecutors seek other victims - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Yelp names BBQ spot in Big Pine, CA the best restaurant in the country - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\California lawmakers heading to policy retreats to prepare for year - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Israel's spy agency denies opposing new Iran sanctions - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Obama won't meet with Netanyahu during U.S. visit, White House says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\3 arrested, drugs, military-style weapons seized in standoff - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\City National, L.A.'s 'bank to the stars,' sold for $5.4 billion - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Are drug-carrying drones the future of smuggling Probably not, DEA says - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Dodger Stadium attacker to plead guilty to federal weapons charge - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Why Will Ferrell hit a cheerleader in the head with a basketball - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\GOP operative plans anti-Steyer effort - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Ex-state Sen. Calderon seeks another delay in public corruption trial - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Jury acquits skin-care guru accused of plot against rival - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\DreamWorks Animation to cut 500 jobs - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Glendale mental health center named for L.A. County Supervisor Antonovich - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Police appear to try to tip man out of his wheelchair in video - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Billionaire Tom Steyer eyes climate change, education in potential Senate bid - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\U.S. judge rejects overtime for home health aides - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Tom Steyer's exit shifts Senate race focus to Antonio Villaraigosa - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\St. Louis officers fatally shoot man armed with loaded gun, police say - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Thousands of fiery red crabs wash ashore in Newport Beach - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Environmentalist Tom Steyer opts out of Senate race - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Boston bombing trial delayed as jury selection bogs down - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\Kobe Bryant suffers torn rotator cuff in right shoulder - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.latimes.com\2015\India court orders activist on 14-year hunger strike freed - LA Times.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\gotham External Affairs by JOEL ROSENBLATT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\movies In Brief Judy Berlin by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\intelligencer March 6, 2000 by BETH LANDMAN KEIL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\cityside The Laptop Nomads by MARK JACOBSON (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\features Poisoned at the Source by DAVID D. KIRKPATRICK (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\cover story Silicon Alley 10003 by VANESSA GRIGORIADIS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\cover story Best Bets by CORKY POLLAN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\classical music Some Like It Tepid by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\features Song And Dance by MICHAEL TOMASKY (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\smart city Smooth Moves by ROSE-ANNE CLERMONT (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\movies Man in the Muddle by PETER RAINER (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\cover story Inside the Happiness Business by DAVID D. KIRKPATRICK (05 15 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\media Queen for a Day by MICHAEL WOLFF (03 06 00) 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\gotham style The jean jacket, reconsidered (05 15 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\theater Love Canal by JOHN SIMON (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\pop music Bold Ambition by ETHAN BROWN (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\dance Roots by TOBI TOBIAS (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\the city politic Screening Rooms by PEGGY EDERSHEIM KALB (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2000\sales & bargains Snake Charmers by SHYAMA PATEL (03 06 00).txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\Brella Bar Opens on Elizabeth St. - Sleep Opens in Williamsburg - Katrina Parris Moves to Harlem - Plain Sud Closing.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\A Crime Rap Sheet on Clinton Street.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 6.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\I Am a Bird Now - New York Magazine Pop Music Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\New York's Latest Dating Fad.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\Co-op Boards and Brokers Cooperate - LeFrak City Expands to East 73rd St. - The Upper Upper West Side Price Difference.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\Remembering Architect Philip Johnson.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\Who Failed the C Train.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\Peter Hayn - New York Magazine Look Book.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\Onera - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 5.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\The Controversy Over the Harvey Milk School 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\The Two Blondes Behind Martha Stewart Living Omnimedia.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 4.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\Sukhadia's - New York Magazine Restaurant Review.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 2.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2005\CBS News Revolt -- The National Guard Fiasco -- Dan Rather, Martha Mapes 3.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Jersey Shore to Miami Clubs Give Us Your Dignity -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Finding Out About Merrill Lynch�s CDO Problem Was Like Getting Kicked in the Balls, Says Former CEO -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Will Apple Announce a Mass iPhone Recall Friday -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\AIG Chairman Steps Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Shoeshiner Arrested for Burning Stand Down Twice, Returns to Work Next Day -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Attorney General Candidate Eric Schneiderman Gets Attention for the Wrong Reasons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Easter to Blame for Unemployment -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Comptroller DiNapoli Is Dragged Into Hevesi State Pension-Fund Scandal -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Former NYU Chemistry Director Submitted Over $400K Worth of Fake Receipts -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Taliban�s Terrifying Army of Monkey Terrorists Effectively Ends War -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Is Banksy�s Mr. Brainwash an Art-World Borat -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Snooki to Meghan McCain Your Dad Is Hot -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Cuban Spy Gets Life in Jail -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Bet You Thought Rachel Maddow Always Defied Her Gender Norms -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Mort Zuckerman Is Not an Obama Speechwriter -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Editor & Publisher Names Journal CEO Publisher of the Year -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Obama Orders Hospital Visitation Rights for Same-Sex Partners -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Pope Breaks Silence Over Recent Church Scandals -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Movies on the TV on the Computer on... -- The Projectionist.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\See Sonic Youth�s Thurston Moore �Teaching� -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Manhattan Real Estate Market Reports Closings Up, Prices Down -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\U.S. Officials Knew About the Russian Spies for Ten Years -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Tea Party Express Releases Hit List -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Gaga for the Hamptons -- Daily Intel.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2010\Mike Bolt, Keeper of the Stanley Cup, on His White Gloves and Hanging Out With John Cusack -- The Sports Section.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\Bradley Cooper Consumed 8,000 Calories a Day -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\Emma Sulkowicz to Attend State of the Union -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\The Most-Coveted Titles at Idea Books� New Store -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\Why Can�t Hollywood Get Women Journalists Right -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\What It�s Like to Date Your Dad -- Science of Us.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\Can This Congressman Make America More Zen -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\How to Revive Your Summer Stripes in Winter -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\Was That a Blow-Job Bib on Empire Last Night -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\Sorry, Glassholes Google to Scrap Google Glass -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\It�s Now Okay to Sleep Through a Fashion Show -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\What It�s Like to Be One of �Karl�s Boys� -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\The Pope Doesn�t Want You Breeding Like Rabbits -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\Guys, Drake Wants to Date a Writer -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\The Rise and Rise and Rise of ABC�s Ben Sherwood -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\53 Historians Weigh In on Barack Obama�s Legacy -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\Isaac Mizrahi Does Not Know What the Sun Is -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\Remember Glitter-Bombing Is Sparkly and Illegal -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\The Real, the Abstract The Work of Michael Mann -- Vulture.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\Report Frida Giannini Departed Early From Gucci -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\Kendall Jenner Reflects on Her Youth for WSJ -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\One Percent Have Almost Half the World�s Wealth -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\World Pissed That Women Just Want to Be Friends -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\Objects With More Chemistry Than 50 Shades Stars -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\U.K. Spy Agency Collected Journalists� Emails -- NYMag.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\Taylor Swift Wore Jodhpurs to Whole Foods -- The Cut.txt
c:\crawlToTheFuture\crawl-to-the-future\testing\wbce-tests\LinkQuotaFilter\www.nymag.com\2015\Solange Posted the Sweetest MLK Tribute -- The Cut.txt
[About 19810 more lines. Double-click to unfold]
>>> linkquotatest[0]
1: {'20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'measurements': {'a': 0.8906115417743324,
'f1': 0.7434343434343433,
'p': 0.6411149825783972,
'r': 0.8846153846153846},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'measurements': {'a': 0.8870168483647175,
'f1': 0.4622641509433962,
'p': 0.3202614379084967,
'r': 0.8305084745762712},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'measurements': {'a': 0.8997756170531039,
'f1': 0.8404761904761906,
'p': 0.7741228070175439,
'r': 0.9192708333333334},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_movies_video_video_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'measurements': {'a': 0.9304979253112033,
'f1': 0.9330669330669331,
'p': 0.9192913385826772,
'r': 0.947261663286004},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'measurements': {'a': 0.8876058506543495,
'f1': 0.8519269776876267,
'p': 0.7909604519774012,
'r': 0.9230769230769231},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'measurements': {'a': 0.8938181818181818,
'f1': 0.8089005235602095,
'p': 0.7322274881516587,
'r': 0.9035087719298246},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'measurements': {'a': 0.8930817610062893,
'f1': 0.8034682080924855,
'p': 0.7202072538860104,
'r': 0.9084967320261438},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'measurements': {'a': 0.8708771929824561,
'f1': 0.8491803278688524,
'p': 0.7719821162444114,
'r': 0.9435336976320583},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'measurements': {'a': 0.8369659982563208,
'f1': 0.6175869120654397,
'p': 0.4886731391585761,
'r': 0.8388888888888889},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'measurements': {'a': 0.8608695652173913,
'f1': 0.7743589743589743,
'p': 0.6817155756207675,
'r': 0.8961424332344213},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'measurements': {'a': 0.8429319371727748,
'f1': 0.6875000000000001,
'p': 0.5739130434782609,
'r': 0.8571428571428571},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'measurements': {'a': 0.8483455882352942,
'f1': 0.6405228758169934,
'p': 0.5176056338028169,
'r': 0.84},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.html',
'year': '2000'},
'20001110075800_entertainment_citysearch': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'measurements': {'a': 0.8987012987012987,
'f1': 0.8276877761413844,
'p': 0.7473404255319149,
'r': 0.9273927392739274},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.html',
'year': '2000'},
'20001110075800_entertainment_citysearch_html': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'measurements': {'a': 0.870575221238938,
'f1': 0.7719298245614035,
'p': 0.673469387755102,
'r': 0.9041095890410958},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'measurements': {'a': 0.892325996430696,
'f1': 0.8507831821929102,
'p': 0.8087774294670846,
'r': 0.8973913043478261},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'measurements': {'a': 0.8891228070175439,
'f1': 0.8704918032786886,
'p': 0.8219814241486069,
'r': 0.9250871080139372},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'measurements': {'a': 0.8882480173035328,
'f1': 0.8651000870322019,
'p': 0.8094462540716613,
'r': 0.9289719626168225},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'measurements': {'a': 0.8646080760095012,
'f1': 0.7381316998468606,
'p': 0.6731843575418994,
'r': 0.8169491525423729},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'measurements': {'a': 0.9012016021361816,
'f1': 0.8435517970401691,
'p': 0.7823529411764706,
'r': 0.9151376146788991},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'measurements': {'a': 0.8988023952095808,
'f1': 0.8668242710795901,
'p': 0.8308157099697885,
'r': 0.9060955518945635},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'measurements': {'a': 0.8430629264594389,
'f1': 0.7389659520807061,
'p': 0.6369565217391304,
'r': 0.8798798798798799},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'measurements': {'a': 0.8364406779661017,
'f1': 0.6666666666666666,
'p': 0.5376044568245125,
'r': 0.8772727272727273},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_holiday_tv_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'measurements': {'a': 0.9415675297410777,
'f1': 0.9534689328503761,
'p': 0.9406267179769104,
'r': 0.9666666666666667},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_music_news_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'measurements': {'a': 0.8780487804878049,
'f1': 0.8494711147274208,
'p': 0.786144578313253,
'r': 0.9238938053097345},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'measurements': {'a': 0.8459657701711492,
'f1': 0.7060653188180405,
'p': 0.58656330749354,
'r': 0.88671875},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'}}
[About 417 more lines. Double-click to unfold]
>>> linkquotatest[0]['20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp']
2: {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'measurements': {'a': 0.8906115417743324,
'f1': 0.7434343434343433,
'p': 0.6411149825783972,
'r': 0.8846153846153846},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'}
[About 8 more lines. Double-click to unfold]
>>> part = {str(val['domain']+';'+val['year']):[] for domainyear in linkquotatest for key,val, in domainyear.items()}
>>> part['entertainment.msn.com;2000']
3: []
>>> for domainyear in linkquotatest:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
>>> part['entertainment.msn.com;2000']
4: [{'a': 0.8364406779661017,
'f1': 0.6666666666666666,
'p': 0.5376044568245125,
'r': 0.8772727272727273},
{'a': 0.8646080760095012,
'f1': 0.7381316998468606,
'p': 0.6731843575418994,
'r': 0.8169491525423729},
{'a': 0.8780487804878049,
'f1': 0.8494711147274208,
'p': 0.786144578313253,
'r': 0.9238938053097345},
{'a': 0.8459657701711492,
'f1': 0.7060653188180405,
'p': 0.58656330749354,
'r': 0.88671875},
{'a': 0.870575221238938,
'f1': 0.7719298245614035,
'p': 0.673469387755102,
'r': 0.9041095890410958},
{'a': 0.8930817610062893,
'f1': 0.8034682080924855,
'p': 0.7202072538860104,
'r': 0.9084967320261438},
{'a': 0.8891228070175439,
'f1': 0.8704918032786886,
'p': 0.8219814241486069,
'r': 0.9250871080139372},
{'a': 0.8369659982563208,
'f1': 0.6175869120654397,
'p': 0.4886731391585761,
'r': 0.8388888888888889},
{'a': 0.8608695652173913,
'f1': 0.7743589743589743,
'p': 0.6817155756207675,
'r': 0.8961424332344213},
{'a': 0.8708771929824561,
'f1': 0.8491803278688524,
'p': 0.7719821162444114,
'r': 0.9435336976320583},
{'a': 0.8987012987012987,
'f1': 0.8276877761413844,
'p': 0.7473404255319149,
'r': 0.9273927392739274},
{'a': 0.9304979253112033,
'f1': 0.9330669330669331,
'p': 0.9192913385826772,
'r': 0.947261663286004},
{'a': 0.8870168483647175,
'f1': 0.4622641509433962,
'p': 0.3202614379084967,
'r': 0.8305084745762712},
{'a': 0.9012016021361816,
'f1': 0.8435517970401691,
'p': 0.7823529411764706,
'r': 0.9151376146788991},
{'a': 0.892325996430696,
'f1': 0.8507831821929102,
'p': 0.8087774294670846,
'r': 0.8973913043478261},
{'a': 0.8938181818181818,
'f1': 0.8089005235602095,
'p': 0.7322274881516587,
'r': 0.9035087719298246},
{'a': 0.8429319371727748,
'f1': 0.6875000000000001,
'p': 0.5739130434782609,
'r': 0.8571428571428571},
{'a': 0.8483455882352942,
'f1': 0.6405228758169934,
'p': 0.5176056338028169,
'r': 0.84},
{'a': 0.8906115417743324,
'f1': 0.7434343434343433,
'p': 0.6411149825783972,
'r': 0.8846153846153846},
{'a': 0.8430629264594389,
'f1': 0.7389659520807061,
'p': 0.6369565217391304,
'r': 0.8798798798798799},
{'a': 0.8876058506543495,
'f1': 0.8519269776876267,
'p': 0.7909604519774012,
'r': 0.9230769230769231},
{'a': 0.8882480173035328,
'f1': 0.8651000870322019,
'p': 0.8094462540716613,
'r': 0.9289719626168225},
{'a': 0.8997756170531039,
'f1': 0.8404761904761906,
'p': 0.7741228070175439,
'r': 0.9192708333333334},
{'a': 0.8988023952095808,
'f1': 0.8668242710795901,
'p': 0.8308157099697885,
'r': 0.9060955518945635},
{'a': 0.9415675297410777,
'f1': 0.9534689328503761,
'p': 0.9406267179769104,
'r': 0.9666666666666667}]
[About 99 more lines. Double-click to unfold]
>>> listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
5: ['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter']
[About 8 more lines. Double-click to unfold]
>>> listsubdir(listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests']))
6: ['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.nymag.com']
[About 115 more lines. Double-click to unfold]
>>> def listsubdir(directory, test=lambda x: os.path.isdir(x)):
... return [os.path.join(d, f) for d in directory for f in os.listdir(d)
... if test(os.path.join(d, f))]
>>> listsubdir(listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests']))
7: ['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.nymag.com']
[About 115 more lines. Double-click to unfold]
>>> listsubdir(listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests']), lambda x: x=="results.pkl")
8: []
>>> listsubdir(listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests']), lambda x: os.path.split(x)[-1][0]=="results.pkl")
9: []
>>> listsubdir(listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests']))
10: ['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\www.nymag.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\news.bbc.co.uk',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\news.yahoo.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\thenation.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.cnn.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.esquire.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.forbes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.foxnews.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.latimes.com',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\www.nymag.com']
[About 115 more lines. Double-click to unfold]
>>> [f for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests']) if f == "results.pkl"]
11: []
>>> [filename for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests']) for filename in os.listdir(f) if filename == "results.pkl"]
12: ['results.pkl',
'results.pkl',
'results.pkl',
'results.pkl',
'results.pkl',
'results.pkl',
'results.pkl',
'results.pkl',
'results.pkl']
[About 8 more lines. Double-click to unfold]
>>> packages = [os.path.join(f,filename) for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests']) for filename in os.listdir(f) if filename == "results.pkl"]
>>> packages[0]
13: 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\results.pkl'
>>> linkquotatest[0]
14: {'20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'measurements': {'a': 0.8906115417743324,
'f1': 0.7434343434343433,
'p': 0.6411149825783972,
'r': 0.8846153846153846},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_Especial_1013_bratpack_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'measurements': {'a': 0.8870168483647175,
'f1': 0.4622641509433962,
'p': 0.3202614379084967,
'r': 0.8305084745762712},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_1013_fashion_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'measurements': {'a': 0.8997756170531039,
'f1': 0.8404761904761906,
'p': 0.7741228070175439,
'r': 0.9192708333333334},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_celebs_eonline_r_awfultruth_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_movies_video_video_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'measurements': {'a': 0.9304979253112033,
'f1': 0.9330669330669331,
'p': 0.9192913385826772,
'r': 0.947261663286004},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_movies_video_video_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'measurements': {'a': 0.8876058506543495,
'f1': 0.8519269776876267,
'p': 0.7909604519774012,
'r': 0.9230769230769231},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'measurements': {'a': 0.8938181818181818,
'f1': 0.8089005235602095,
'p': 0.7322274881516587,
'r': 0.9035087719298246},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_1018_dlachey_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'measurements': {'a': 0.8930817610062893,
'f1': 0.8034682080924855,
'p': 0.7202072538860104,
'r': 0.9084967320261438},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_news_eonline_DottedLine_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'measurements': {'a': 0.8708771929824561,
'f1': 0.8491803278688524,
'p': 0.7719821162444114,
'r': 0.9435336976320583},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_feature2_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'measurements': {'a': 0.8369659982563208,
'f1': 0.6175869120654397,
'p': 0.4886731391585761,
'r': 0.8388888888888889},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed01_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'measurements': {'a': 0.8608695652173913,
'f1': 0.7743589743589743,
'p': 0.6817155756207675,
'r': 0.8961424332344213},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'measurements': {'a': 0.8429319371727748,
'f1': 0.6875000000000001,
'p': 0.5739130434782609,
'r': 0.8571428571428571},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed03_asp.html',
'year': '2000'},
'20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'measurements': {'a': 0.8483455882352942,
'f1': 0.6405228758169934,
'p': 0.5176056338028169,
'r': 0.84},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001018072019_www_entertainment_msn_com_tv_gist_wed04_asp.html',
'year': '2000'},
'20001110075800_entertainment_citysearch': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'measurements': {'a': 0.8987012987012987,
'f1': 0.8276877761413844,
'p': 0.7473404255319149,
'r': 0.9273927392739274},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch.html',
'year': '2000'},
'20001110075800_entertainment_citysearch_html': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'measurements': {'a': 0.870575221238938,
'f1': 0.7719298245614035,
'p': 0.673469387755102,
'r': 0.9041095890410958},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_entertainment_citysearch_html.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'measurements': {'a': 0.892325996430696,
'f1': 0.8507831821929102,
'p': 0.8087774294670846,
'r': 0.8973913043478261},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_celebs_eonline_i_1110_asandler_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_Topcds_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'measurements': {'a': 0.8891228070175439,
'f1': 0.8704918032786886,
'p': 0.8219814241486069,
'r': 0.9250871080139372},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_Topcds_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_features_feature3_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'measurements': {'a': 0.8882480173035328,
'f1': 0.8651000870322019,
'p': 0.8094462540716613,
'r': 0.9289719626168225},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_features_feature3_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_music_news_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'measurements': {'a': 0.8646080760095012,
'f1': 0.7381316998468606,
'p': 0.6731843575418994,
'r': 0.8169491525423729},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_music_news_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'measurements': {'a': 0.9012016021361816,
'f1': 0.8435517970401691,
'p': 0.7823529411764706,
'r': 0.9151376146788991},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_dragon_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'measurements': {'a': 0.8988023952095808,
'f1': 0.8668242710795901,
'p': 0.8308157099697885,
'r': 0.9060955518945635},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_news_eonline_1110_spears_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'measurements': {'a': 0.8430629264594389,
'f1': 0.7389659520807061,
'p': 0.6369565217391304,
'r': 0.8798798798798799},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri01_asp.html',
'year': '2000'},
'20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'measurements': {'a': 0.8364406779661017,
'f1': 0.6666666666666666,
'p': 0.5376044568245125,
'r': 0.8772727272727273},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001110075800_www_entertainment_msn_com_tv_gist_fri02_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_holiday_tv_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'measurements': {'a': 0.9415675297410777,
'f1': 0.9534689328503761,
'p': 0.9406267179769104,
'r': 0.9666666666666667},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_holiday_tv_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_music_news_wed01_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'measurements': {'a': 0.8780487804878049,
'f1': 0.8494711147274208,
'p': 0.786144578313253,
'r': 0.9238938053097345},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_music_news_wed01_asp.html',
'year': '2000'},
'20001206091000_entertainment_msn_com_tv_gist_wed02_asp': {'domain': 'entertainment.msn.com',
'goldpath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'measurements': {'a': 0.8459657701711492,
'f1': 0.7060653188180405,
'p': 0.58656330749354,
'r': 0.88671875},
'testpath': 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.txt',
'universepath': 'c:/crawlToTheFuture/crawl-to-the-future/dataset/entertainment.msn.com\\2000\\20001206091000_entertainment_msn_com_tv_gist_wed02_asp.html',
'year': '2000'}}
[About 417 more lines. Double-click to unfold]
>>> def trim_results(domain_results):
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
[About 5 more lines. Double-click to unfold]
>>> def trim_results(domain_results):
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
... return part
[About 7 more lines. Double-click to unfold]
>>> trim_results(pickle.load(open(packages[0])))
15: {'entertainment.msn.com;2000': [{'a': 0.9472774416594641,
'f1': 0.8551068883610451,
'p': 0.8695652173913043,
'r': 0.8411214953271028},
{'a': 0.9224137931034483,
'f1': 0.8266199649737302,
'p': 0.8973384030418251,
'r': 0.7662337662337663},
{'a': 0.933852140077821,
'f1': 0.9098939929328622,
'p': 0.9501845018450185,
'r': 0.8728813559322034},
{'a': 0.8989405052974735,
'f1': 0.7847222222222223,
'p': 0.7174603174603175,
'r': 0.8659003831417624},
{'a': 0.927536231884058,
'f1': 0.8558758314855874,
'p': 0.8577777777777778,
'r': 0.8539823008849557},
{'a': 0.9419152276295133,
'f1': 0.87987012987013,
'p': 0.9093959731543624,
'r': 0.8522012578616353},
{'a': 0.9335180055401662,
'f1': 0.916083916083916,
'p': 0.9509981851179673,
'r': 0.8836424957841484},
{'a': 0.8910472972972973,
'f1': 0.7425149700598803,
'p': 0.6813186813186813,
'r': 0.8157894736842105},
{'a': 0.894695170229612,
'f1': 0.8129395218002814,
'p': 0.774798927613941,
'r': 0.8550295857988166},
{'a': 0.8803122289679098,
'f1': 0.676056338028169,
'p': 0.6075949367088608,
'r': 0.7619047619047619},
{'a': 0.9037656903765691,
'f1': 0.8804159445407279,
'p': 0.8581081081081081,
'r': 0.9039145907473309},
{'a': 0.9509632224168126,
'f1': 0.9087947882736157,
'p': 0.9,
'r': 0.9177631578947368},
{'a': 0.9333680374804789,
'f1': 0.9330543933054394,
'p': 0.970620239390642,
'r': 0.8982880161127895},
{'a': 0.9530469530469531,
'f1': 0.6356589147286822,
'p': 0.6029411764705882,
'r': 0.6721311475409836},
{'a': 0.9472981987991995,
'f1': 0.9090909090909091,
'p': 0.9360189573459715,
'r': 0.883668903803132},
{'a': 0.9328579916815211,
'f1': 0.8986547085201794,
'p': 0.9488636363636364,
'r': 0.8534923339011925},
{'a': 0.9454148471615721,
'f1': 0.8898678414096917,
'p': 0.9181818181818182,
'r': 0.8632478632478633},
{'a': 0.8812227074235808,
'f1': 0.7301587301587301,
'p': 0.6789667896678967,
'r': 0.7896995708154506},
{'a': 0.8959484346224678,
'f1': 0.7064935064935064,
'p': 0.6507177033492823,
'r': 0.7727272727272727},
{'a': 0.889644746787604,
'f1': 0.7972222222222222,
'p': 0.7572559366754618,
'r': 0.841642228739003},
{'a': 0.9303030303030303,
'f1': 0.8993435448577681,
'p': 0.9383561643835616,
'r': 0.8634453781512605},
{'a': 0.9379900213827512,
'f1': 0.9186155285313378,
'p': 0.9478764478764479,
'r': 0.8911070780399274},
{'a': 0.9497374343585896,
'f1': 0.9130998702983139,
'p': 0.9263157894736842,
'r': 0.9002557544757033},
{'a': 0.9401555954518253,
'f1': 0.9159663865546219,
'p': 0.9527972027972028,
'r': 0.8818770226537217},
{'a': 0.9470013947001394,
'f1': 0.9573273441886581,
'p': 0.9567901234567902,
'r': 0.9578651685393258}],
'entertainment.msn.com;2005': [{'a': 0.6906686260102866,
'f1': 0.6982078853046595,
'p': 0.5553021664766249,
'r': 0.9401544401544402},
{'a': 0.5468451242829828,
'f1': 0.21000000000000002,
'p': 0.12401574803149606,
'r': 0.6847826086956522},
{'a': 0.49862825788751713,
'f1': 0.23455497382198953,
'p': 0.14267515923566879,
'r': 0.6588235294117647},
{'a': 0.6598006644518273,
'f1': 0.580327868852459,
'p': 0.4270205066344994,
'r': 0.9053708439897699},
{'a': 0.6897289586305279,
'f1': 0.7030716723549487,
'p': 0.5640744797371303,
'r': 0.9329710144927537},
{'a': 0.6415343915343915,
'f1': 0.5893939393939394,
'p': 0.44457142857142856,
'r': 0.8741573033707866},
{'a': 0.6283185840707964,
'f1': 0.5948553054662379,
'p': 0.44417767106842737,
'r': 0.9002433090024331},
{'a': 0.7079343365253078,
'f1': 0.7292327203551046,
'p': 0.5958549222797928,
'r': 0.9395424836601307},
{'a': 0.6803051317614425,
'f1': 0.6069906223358909,
'p': 0.4587628865979381,
'r': 0.8967254408060453},
{'a': 0.8139684583199227,
'f1': 0.8720106288751107,
'p': 0.7942718838241226,
'r': 0.9666175748649976},
{'a': 0.7660757733750434,
'f1': 0.8402563493947306,
'p': 0.7378074197582326,
'r': 0.9757442116868799},
{'a': 0.6724023825281271,
'f1': 0.6583850931677019,
'p': 0.5107066381156317,
'r': 0.9262135922330097},
{'a': 0.6503153468815698,
'f1': 0.6453447050461976,
'p': 0.49780701754385964,
'r': 0.9171717171717172},
{'a': 0.7622252131000449,
'f1': 0.8039940828402367,
'p': 0.7003865979381443,
'r': 0.9435763888888888},
{'a': 0.5560640732265446,
'f1': 0.4203187250996016,
'p': 0.2779973649538867,
'r': 0.8612244897959184},
{'a': 0.6134852801519468,
'f1': 0.36899224806201547,
'p': 0.2389558232931727,
'r': 0.8095238095238095},
{'a': 0.6816109422492401,
'f1': 0.6823351023502654,
'p': 0.5357142857142857,
'r': 0.9394572025052192},
{'a': 0.5220125786163522,
'f1': 0.27790973871733965,
'p': 0.16931982633863965,
'r': 0.7748344370860927},
{'a': 0.6473509933774835,
'f1': 0.5988700564971752,
'p': 0.4428969359331476,
'r': 0.9244186046511628},
{'a': 0.662015503875969,
'f1': 0.6466774716369531,
'p': 0.4962686567164179,
'r': 0.9279069767441861},
{'a': 0.5902621722846442,
'f1': 0.3572267920094007,
'p': 0.22926093514328807,
'r': 0.8085106382978723},
{'a': 0.5920763022743947,
'f1': 0.540495867768595,
'p': 0.38698224852071006,
'r': 0.8958904109589041},
{'a': 0.6629581151832461,
'f1': 0.57190357439734,
'p': 0.4226044226044226,
'r': 0.884318766066838},
{'a': 0.7147041593438781,
'f1': 0.7240793201133144,
'p': 0.5889400921658986,
'r': 0.9397058823529412},
{'a': 0.6313432835820896,
'f1': 0.6085578446909667,
'p': 0.4544378698224852,
'r': 0.920863309352518},
{'a': 0.5412639405204461,
'f1': 0.25392986698911735,
'p': 0.15306122448979592,
'r': 0.7446808510638298},
{'a': 0.6824005394470668,
'f1': 0.6713189113747383,
'p': 0.5320796460176991,
'r': 0.9092627599243857}],
'entertainment.msn.com;2010': [{'a': 0.3428857047650783,
'f1': 0.1658206429780034,
'p': 0.09201877934272301,
'r': 0.8376068376068376},
{'a': 0.45119947420308903,
'f1': 0.42013888888888884,
'p': 0.2742520398912058,
'r': 0.8976261127596439},
{'a': 0.6411235342241615,
'f1': 0.5517711171662125,
'p': 0.4136874361593463,
'r': 0.8282208588957055},
{'a': 0.9039820166987798,
'f1': 0.8546426835196889,
'p': 0.8108856088560885,
'r': 0.9033915724563206},
{'a': 0.3921737260804128,
'f1': 0.34117921230482406,
'p': 0.20938215102974828,
'r': 0.9207547169811321},
{'a': 0.5535641547861507,
'f1': 0.3234567901234568,
'p': 0.19969512195121952,
'r': 0.8506493506493507},
{'a': 0.14695238095238095,
'f1': 0.10955363356198429,
'p': 0.058325394305070395,
'r': 0.9003267973856209},
{'a': 0.7608596250571559,
'f1': 0.6565988181221274,
'p': 0.5488474204171241,
'r': 0.8169934640522876},
{'a': 0.884125920964501,
'f1': 0.7920673076923077,
'p': 0.7289823008849557,
'r': 0.8671052631578947},
{'a': 0.896735273243435,
'f1': 0.8186915887850468,
'p': 0.7595375722543353,
'r': 0.8878378378378379},
{'a': 0.7920924033762772,
'f1': 0.7240566037735848,
'p': 0.6220871327254306,
'r': 0.8660084626234132},
{'a': 0.36601513240857503,
'f1': 0.25046589638464406,
'p': 0.14608695652173914,
'r': 0.8772845953002611},
{'a': 0.1811268579329416,
'f1': 0.1899469994870918,
'p': 0.10574909575480677,
'r': 0.9320469798657718},
{'a': 0.41000352236703064,
'f1': 0.32595573440643866,
'p': 0.20009881422924902,
'r': 0.8785249457700651},
{'a': 0.7624944714727997,
'f1': 0.6670799752014879,
'p': 0.5563598759048604,
'r': 0.8328173374613003},
{'a': 0.4393613754989254,
'f1': 0.42506297229219153,
'p': 0.27439024390243905,
'r': 0.9427374301675978},
{'a': 0.8097795364612775,
'f1': 0.837321730722746,
'p': 0.7646799116997792,
'r': 0.9252136752136753},
{'a': 0.13969136253931105,
'f1': 0.13577253691866875,
'p': 0.07321131447587355,
'r': 0.9333333333333333},
{'a': 0.6126237623762376,
'f1': 0.44503546099290786,
'p': 0.2977461447212337,
'r': 0.8807017543859649},
{'a': 0.7682926829268293,
'f1': 0.7115384615384617,
'p': 0.6161262050832603,
'r': 0.8419161676646707},
{'a': 0.6949648711943794,
'f1': 0.7540132200188858,
'p': 0.6240719030871434,
'r': 0.9522957662492546},
{'a': 0.4694069657985566,
'f1': 0.4663931839697065,
'p': 0.3124735729386892,
'r': 0.9191542288557214},
{'a': 0.789193302891933,
'f1': 0.7656514382402708,
'p': 0.6830188679245283,
'r': 0.8710298363811357},
{'a': 0.3921901528013582,
'f1': 0.32634408602150533,
'p': 0.19927774130006565,
'r': 0.900593471810089},
{'a': 0.7479967948717948,
'f1': 0.6831234256926951,
'p': 0.5809768637532133,
'r': 0.8288508557457213}],
'entertainment.msn.com;2015': [{'a': 0.5922783603431839,
'f1': 0.30057236304170076,
'p': 0.18274010737721216,
'r': 0.8462246777163904},
{'a': 0.5627964528768818,
'f1': 0.08620689655172414,
'p': 0.045599635202918376,
'r': 0.7874015748031497},
{'a': 0.5792236086353734,
'f1': 0.20978240654640132,
'p': 0.12028150991682661,
'r': 0.8197674418604651},
{'a': 0.5620437956204379,
'f1': 0.08229211546747094,
'p': 0.0435278030993619,
'r': 0.7519685039370079},
{'a': 0.5617125883437468,
'f1': 0.08782775527606056,
'p': 0.046627433227704844,
'r': 0.7545787545787546},
{'a': 0.5577628361858191,
'f1': 0.08821676118462508,
'p': 0.04678102027177545,
'r': 0.7720588235294118},
{'a': 0.49898887765419614,
'f1': 0.17462520821765687,
'p': 0.09741365959423881,
'r': 0.8420348058902276},
{'a': 0.5571502323180175,
'f1': 0.06700021753317381,
'p': 0.03503184713375796,
'r': 0.7661691542288557},
{'a': 0.5541204819277108,
'f1': 0.11345343043311613,
'p': 0.060980634528224144,
'r': 0.8131868131868132},
{'a': 0.5564376590330788,
'f1': 0.07825719120135363,
'p': 0.04120267260579064,
'r': 0.7773109243697479},
{'a': 0.4916342588405535,
'f1': 0.0852725793327909,
'p': 0.04492455418381344,
'r': 0.8370607028753994},
{'a': 0.5661891699685055,
'f1': 0.12139917695473253,
'p': 0.06573083778966132,
'r': 0.793010752688172},
{'a': 0.5645177312009536,
'f1': 0.16622289844047167,
'p': 0.09192259150189314,
'r': 0.8670634920634921},
{'a': 0.5425956576769545,
'f1': 0.11157337367624812,
'p': 0.059995932479153954,
'r': 0.7951482479784366},
{'a': 0.5201313937118723,
'f1': 0.0501579045142114,
'p': 0.025921658986175114,
'r': 0.7714285714285715},
{'a': 0.5346083788706739,
'f1': 0.19119974675530232,
'p': 0.10803076372741907,
'r': 0.8308115543328748},
{'a': 0.5627691984452148,
'f1': 0.05063868613138687,
'p': 0.02616690240452617,
'r': 0.7816901408450704},
{'a': 0.5659126365054602,
'f1': 0.159682899207248,
'p': 0.08790523690773068,
'r': 0.8703703703703703},
{'a': 0.5280909612077203,
'f1': 0.09525554130793186,
'p': 0.05061319836480436,
'r': 0.8074534161490683},
{'a': 0.5116564417177915,
'f1': 0.16935002981514607,
'p': 0.09424257507881201,
'r': 0.8340675477239354},
{'a': 0.5793736501079914,
'f1': 0.3330479452054795,
'p': 0.20717202201313686,
'r': 0.8487272727272728},
{'a': 0.5667684090041969,
'f1': 0.1768756795940558,
'p': 0.09878542510121457,
'r': 0.8442906574394463},
{'a': 0.5547138908793121,
'f1': 0.05884898312418867,
'p': 0.030665163472378805,
'r': 0.7272727272727273},
{'a': 0.5545847039473685,
'f1': 0.05824820691154097,
'p': 0.030330466274332276,
'r': 0.73224043715847},
{'a': 0.5695238095238095,
'f1': 0.1908342284282134,
'p': 0.10769852495453627,
'r': 0.8367346938775511}],
'news.bbc.co.uk;2000': [{'a': 0.9118311981914092,
'f1': 0.8494208494208494,
'p': 0.88,
'r': 0.8208955223880597},
{'a': 0.9057377049180327,
'f1': 0.8410138248847927,
'p': 0.8837772397094431,
'r': 0.8021978021978022},
{'a': 0.9044198895027624,
'f1': 0.9006318207926479,
'p': 0.9223529411764706,
'r': 0.8799102132435466},
{'a': 0.9266358228684732,
'f1': 0.9125295508274233,
'p': 0.9429967426710097,
'r': 0.8839694656488549},
{'a': 0.9117997616209773,
'f1': 0.9022457067371201,
'p': 0.918010752688172,
'r': 0.887012987012987},
{'a': 0.8712029161603888,
'f1': 0.825944170771757,
'p': 0.8152350081037277,
'r': 0.8369384359400999},
{'a': 0.9092284417549168,
'f1': 0.8529411764705883,
'p': 0.8969072164948454,
'r': 0.8130841121495327},
{'a': 0.9022482893450635,
'f1': 0.8511904761904762,
'p': 0.8746177370030581,
'r': 0.8289855072463768},
{'a': 0.8823114869626497,
'f1': 0.7930607187112763,
'p': 0.8142493638676844,
'r': 0.7729468599033816},
{'a': 0.9267782426778243,
'f1': 0.9076517150395779,
'p': 0.9502762430939227,
'r': 0.8686868686868687},
{'a': 0.9225251076040172,
'f1': 0.8689320388349514,
'p': 0.9132653061224489,
'r': 0.8287037037037037},
{'a': 0.8396866840731071,
'f1': 0.8188790560471977,
'p': 0.7797752808988764,
'r': 0.8621118012422361},
{'a': 0.9097978227060654,
'f1': 0.8284023668639052,
'p': 0.8588957055214724,
'r': 0.8},
{'a': 0.8773255813953489,
'f1': 0.8451944240645635,
'p': 0.8458149779735683,
'r': 0.844574780058651},
{'a': 0.9183238636363636,
'f1': 0.8762109795479011,
'p': 0.9187358916478555,
'r': 0.8374485596707819},
{'a': 0.9077791718946048,
'f1': 0.8746803069053709,
'p': 0.9210053859964094,
'r': 0.8327922077922078},
{'a': 0.9012187299550994,
'f1': 0.8555347091932457,
'p': 0.9101796407185628,
'r': 0.8070796460176991},
{'a': 0.8915866741953699,
'f1': 0.8748370273794003,
'p': 0.8946666666666667,
'r': 0.8558673469387755},
{'a': 0.9106292966684294,
'f1': 0.9038133181559476,
'p': 0.928654970760234,
'r': 0.8802660753880266},
{'a': 0.8931178310740354,
'f1': 0.8686739269698911,
'p': 0.9125168236877523,
'r': 0.8288508557457213},
{'a': 0.8817005545286506,
'f1': 0.8502340093603743,
'p': 0.8596214511041009,
'r': 0.8410493827160493},
{'a': 0.9145496535796767,
'f1': 0.8948863636363636,
'p': 0.9402985074626866,
'r': 0.8536585365853658},
{'a': 0.9274905422446406,
'f1': 0.9117421335379894,
'p': 0.9565217391304348,
'r': 0.8709677419354839},
{'a': 0.8952116585704372,
'f1': 0.8422152560083594,
'p': 0.8448637316561844,
'r': 0.8395833333333333},
{'a': 0.9093789607097592,
'f1': 0.8686868686868686,
'p': 0.9148936170212766,
'r': 0.8269230769230769}],
'news.bbc.co.uk;2005': [{'a': 0.8391019644527596,
'f1': 0.7152317880794702,
'p': 0.6189111747851003,
'r': 0.8470588235294118},
{'a': 0.8142076502732241,
'f1': 0.7186761229314422,
'p': 0.6166328600405679,
'r': 0.8611898016997167},
{'a': 0.7044967880085653,
'f1': 0.7008670520231215,
'p': 0.5652680652680653,
'r': 0.9220532319391636},
{'a': 0.8440366972477065,
'f1': 0.8042226487523992,
'p': 0.7325174825174825,
'r': 0.8914893617021277},
{'a': 0.8418952618453865,
'f1': 0.8483978957436633,
'p': 0.8063636363636364,
'r': 0.8950554994954592},
{'a': 0.8062460165710643,
'f1': 0.8033635187580855,
'p': 0.711340206185567,
'r': 0.9227340267459139},
{'a': 0.9006181645268664,
'f1': 0.914238818219122,
'p': 0.9034874290348743,
'r': 0.925249169435216},
{'a': 0.82328190743338,
'f1': 0.7700729927007299,
'p': 0.6963696369636964,
'r': 0.8612244897959184},
{'a': 0.8155339805825242,
'f1': 0.7031250000000001,
'p': 0.6094808126410836,
'r': 0.8307692307692308},
{'a': 0.836343732895457,
'f1': 0.8280621046578494,
'p': 0.7903402854006586,
'r': 0.8695652173913043},
{'a': 0.8426966292134831,
'f1': 0.8038528896672504,
'p': 0.765,
'r': 0.8468634686346863},
{'a': 0.8250950570342205,
'f1': 0.7415730337078651,
'p': 0.6626506024096386,
'r': 0.8418367346938775},
{'a': 0.8407202216066482,
'f1': 0.789762340036563,
'p': 0.7728085867620751,
'r': 0.8074766355140187},
{'a': 0.8260200153964589,
'f1': 0.755939524838013,
'p': 0.6809338521400778,
'r': 0.8495145631067961},
{'a': 0.7973811164713991,
'f1': 0.6931106471816284,
'p': 0.5981981981981982,
'r': 0.8238213399503722},
{'a': 0.8313349320543565,
'f1': 0.7365792759051186,
'p': 0.6526548672566371,
'r': 0.8452722063037249},
{'a': 0.7385358004827032,
'f1': 0.6277205040091639,
'p': 0.4823943661971831,
'r': 0.898360655737705},
{'a': 0.7896613190730838,
'f1': 0.7941860465116279,
'p': 0.6905965621840243,
'r': 0.9343365253077975},
{'a': 0.6781193490054249,
'f1': 0.5180505415162455,
'p': 0.38523489932885907,
'r': 0.790633608815427},
{'a': 0.794679005205321,
'f1': 0.8104644954618259,
'p': 0.711340206185567,
'r': 0.9416873449131513},
{'a': 0.8725854383358098,
'f1': 0.8961550105964273,
'p': 0.8716136631330977,
'r': 0.9221183800623053},
{'a': 0.7995110024449877,
'f1': 0.638235294117647,
'p': 0.5331695331695332,
'r': 0.7948717948717948},
{'a': 0.8473439917483239,
'f1': 0.8435517970401691,
'p': 0.8093306288032455,
'r': 0.8807947019867549},
{'a': 0.709572742022715,
'f1': 0.6546623794212219,
'p': 0.5146612740141557,
'r': 0.8992932862190812},
{'a': 0.8297376093294461,
'f1': 0.8027027027027026,
'p': 0.7342398022249691,
'r': 0.8852459016393442}],
'news.bbc.co.uk;2010': [{'a': 0.7625243981782693,
'f1': 0.7058823529411765,
'p': 0.6311239193083573,
'r': 0.8007312614259597},
{'a': 0.7582283624542869,
'f1': 0.5697758496023138,
'p': 0.4586728754365541,
'r': 0.7519083969465649},
{'a': 0.7143962848297214,
'f1': 0.45089285714285715,
'p': 0.33554817275747506,
'r': 0.6870748299319728},
{'a': 0.706855791962175,
'f1': 0.5267175572519084,
'p': 0.3942857142857143,
'r': 0.7931034482758621},
{'a': 0.7664473684210527,
'f1': 0.32746955345060896,
'p': 0.21530249110320285,
'r': 0.6836158192090396},
{'a': 0.7461730153079388,
'f1': 0.6067291781577496,
'p': 0.5018248175182481,
'r': 0.7670850767085077},
{'a': 0.6400894187779433,
'f1': 0.02424242424242424,
'p': 0.013729977116704805,
'r': 0.10344827586206896},
{'a': 0.7614255765199162,
'f1': 0.4878487848784878,
'p': 0.3702185792349727,
'r': 0.7150395778364116},
{'a': 0.7620689655172413,
'f1': 0.463035019455253,
'p': 0.3380681818181818,
'r': 0.7345679012345679},
{'a': 0.7661676646706587,
'f1': 0.7279693486590039,
'p': 0.6626506024096386,
'r': 0.8075734157650696},
{'a': 0.7575757575757576,
'f1': 0.456,
'p': 0.33727810650887574,
'r': 0.7037037037037037},
{'a': 0.7598484848484849,
'f1': 0.6313953488372093,
'p': 0.5386904761904762,
'r': 0.7626404494382022},
{'a': 0.754180602006689,
'f1': 0.48148148148148145,
'p': 0.3684210526315789,
'r': 0.6946564885496184},
{'a': 0.7579535683576956,
'f1': 0.49141824751580854,
'p': 0.37006802721088433,
'r': 0.7311827956989247},
{'a': 0.7263339070567987,
'f1': 0.29646017699115046,
'p': 0.18457300275482094,
'r': 0.7528089887640449},
{'a': 0.7547416612164813,
'f1': 0.6553308823529411,
'p': 0.5882838283828383,
'r': 0.7396265560165975},
{'a': 0.7636441770519983,
'f1': 0.5454545454545453,
'p': 0.4263565891472868,
'r': 0.7568807339449541},
{'a': 0.6528982992016661,
'f1': 0.4959677419354839,
'p': 0.3614988978692138,
'r': 0.7897271268057785},
{'a': 0.6826923076923077,
'f1': 0.5428424833247819,
'p': 0.41884402216943784,
'r': 0.7711370262390671},
{'a': 0.7665213015766521,
'f1': 0.7015437392795882,
'p': 0.621580547112462,
'r': 0.8051181102362205},
{'a': 0.7759719566602932,
'f1': 0.7356148928168484,
'p': 0.6694045174537988,
'r': 0.8163606010016694},
{'a': 0.7036881810561609,
'f1': 0.4532095901005414,
'p': 0.32447397563676633,
'r': 0.7512820512820513},
{'a': 0.7713230355943587,
'f1': 0.6939325842696629,
'p': 0.628152969894223,
'r': 0.7751004016064257},
{'a': 0.6798048048048048,
'f1': 0.5066512434933488,
'p': 0.3721325403568394,
'r': 0.7934782608695652},
{'a': 0.6916354556803995,
'f1': 0.43650190114068443,
'p': 0.30434782608695654,
'r': 0.771505376344086}],
'news.bbc.co.uk;2015': [{'a': 0.5555871077665767,
'f1': 0.23658536585365852,
'p': 0.14049826187717265,
'r': 0.7484567901234568},
{'a': 0.5497448979591837,
'f1': 0.20236003012804415,
'p': 0.11711711711711711,
'r': 0.7435424354243543},
{'a': 0.563236936825121,
'f1': 0.3763955342902711,
'p': 0.24402573529411764,
'r': 0.8226181254841208},
{'a': 0.5506756756756757,
'f1': 0.14048890137679126,
'p': 0.07836990595611286,
'r': 0.6775067750677507},
{'a': 0.5788987191337647,
'f1': 0.36838978015448604,
'p': 0.23779084633086167,
'r': 0.8172231985940246},
{'a': 0.5673118905545045,
'f1': 0.1927776269345642,
'p': 0.11216429699842022,
'r': 0.6853281853281853},
{'a': 0.5444834855938159,
'f1': 0.19995062947420392,
'p': 0.11571428571428571,
'r': 0.73502722323049},
{'a': 0.5694312474548663,
'f1': 0.2543488481429243,
'p': 0.15510321100917432,
'r': 0.706266318537859},
{'a': 0.5528025381477565,
'f1': 0.11217756448710257,
'p': 0.061131088591042826,
'r': 0.68},
{'a': 0.5627456909585727,
'f1': 0.1770062606715993,
'p': 0.10103963612735542,
'r': 0.713302752293578},
{'a': 0.48321324543921507,
'f1': 0.06335093081411503,
'p': 0.03328467153284671,
'r': 0.6551724137931034},
{'a': 0.5497154836777478,
'f1': 0.08183206106870229,
'p': 0.043762246897452645,
'r': 0.6291079812206573},
{'a': 0.558078141499472,
'f1': 0.2945638432364096,
'p': 0.1813700051894136,
'r': 0.7836322869955157},
{'a': 0.5500292568753657,
'f1': 0.15864332603938733,
'p': 0.08876645240281604,
'r': 0.7455012853470437},
{'a': 0.5471483430521974,
'f1': 0.1939240506329114,
'p': 0.1114019778941245,
'r': 0.748046875},
{'a': 0.5554911619820342,
'f1': 0.1520176893311222,
'p': 0.08550995024875623,
'r': 0.6840796019900498},
{'a': 0.5763792625450513,
'f1': 0.26503126503126506,
'p': 0.1626808385001476,
'r': 0.7146562905317769},
{'a': 0.5585113353426812,
'f1': 0.31202777210537064,
'p': 0.19435258204019334,
'r': 0.7908902691511387},
{'a': 0.5703114281794485,
'f1': 0.30182790905037893,
'p': 0.18701657458563536,
'r': 0.7817551963048499},
{'a': 0.5642479213907785,
'f1': 0.25210810810810813,
'p': 0.1531791907514451,
'r': 0.7118437118437119},
{'a': 0.5640256959314776,
'f1': 0.23726273726273728,
'p': 0.14061574896388396,
'r': 0.7587859424920128},
{'a': 0.5690190257725003,
'f1': 0.2325804901489668,
'p': 0.1399652978600347,
'r': 0.6875},
{'a': 0.5737860137968348,
'f1': 0.3251231527093596,
'p': 0.2042518837459634,
'r': 0.7964323189926548},
{'a': 0.5553221288515406,
'f1': 0.24530544330877105,
'p': 0.1462999716472923,
'r': 0.7588235294117647},
{'a': 0.5427863292460214,
'f1': 0.1616838077015068,
'p': 0.09164859002169197,
'r': 0.6855983772819473}],
'news.yahoo.com;2000': [{'a': 0.9225543478260869,
'f1': 0.9279393173198484,
'p': 0.9607329842931938,
'r': 0.8973105134474327},
{'a': 0.8682457438934122,
'f1': 0.8540983606557376,
'p': 0.837620578778135,
'r': 0.8712374581939799},
{'a': 0.9199632014719411,
'f1': 0.9085173501577286,
'p': 0.9230769230769231,
'r': 0.8944099378881988},
{'a': 0.8974093264248705,
'f1': 0.8626907073509015,
'p': 0.8405405405405405,
'r': 0.886039886039886},
{'a': 0.9158878504672897,
'f1': 0.8979591836734695,
'p': 0.9145496535796767,
'r': 0.8819599109131403},
{'a': 0.9133271202236719,
'f1': 0.8963210702341137,
'p': 0.919908466819222,
'r': 0.8739130434782608},
{'a': 0.8031042128603104,
'f1': 0.8314350797266515,
'p': 0.909468438538206,
'r': 0.7657342657342657},
{'a': 0.8618903754855416,
'f1': 0.8683127572016461,
'p': 0.8591205211726385,
'r': 0.8777038269550749},
{'a': 0.8721804511278195,
'f1': 0.859338061465721,
'p': 0.8644470868014269,
'r': 0.854289071680376},
{'a': 0.877562028047465,
'f1': 0.8540192926045016,
'p': 0.8634590377113134,
'r': 0.8447837150127226},
{'a': 0.8776978417266187,
'f1': 0.8478747203579418,
'p': 0.838495575221239,
'r': 0.8574660633484162},
{'a': 0.920123839009288,
'f1': 0.9315649867374005,
'p': 0.9430719656283566,
'r': 0.9203354297693921},
{'a': 0.8999055712936733,
'f1': 0.8582887700534759,
'p': 0.856,
'r': 0.8605898123324397},
{'a': 0.9074235807860263,
'f1': 0.8960784313725491,
'p': 0.8943248532289628,
'r': 0.8978388998035364},
{'a': 0.8967314069161535,
'f1': 0.9213564213564215,
'p': 0.8948843728100911,
'r': 0.9494423791821561},
{'a': 0.8570649208947081,
'f1': 0.844418052256532,
'p': 0.8535414165666266,
'r': 0.8354876615746181},
{'a': 0.9123867069486404,
'f1': 0.8642745709828393,
'p': 0.9111842105263158,
'r': 0.8219584569732937},
{'a': 0.8925714285714286,
'f1': 0.8164062500000001,
'p': 0.8038461538461539,
'r': 0.8293650793650794},
{'a': 0.9205298013245033,
'f1': 0.9025522041763342,
'p': 0.9131455399061033,
'r': 0.8922018348623854},
{'a': 0.8363959691760522,
'f1': 0.7730263157894737,
'p': 0.7617504051863857,
'r': 0.7846410684474123},
{'a': 0.8599656357388317,
'f1': 0.6433260393873085,
'p': 0.6099585062240664,
'r': 0.6805555555555556},
{'a': 0.90089358245329,
'f1': 0.8842504743833016,
'p': 0.8944337811900192,
'r': 0.874296435272045},
{'a': 0.8973544973544973,
'f1': 0.8422764227642278,
'p': 0.8248407643312102,
'r': 0.8604651162790697},
{'a': 0.9201773835920177,
'f1': 0.924791086350975,
'p': 0.924791086350975,
'r': 0.924791086350975},
{'a': 0.9150214592274678,
'f1': 0.9247148288973385,
'p': 0.8928046989720999,
'r': 0.9589905362776026}],
'news.yahoo.com;2005': [{'a': 0.826288899210404,
'f1': 0.8064182194616977,
'p': 0.7369914853358562,
'r': 0.8902857142857142},
{'a': 0.7051349920592906,
'f1': 0.6634441087613293,
'p': 0.5294117647058824,
'r': 0.8883495145631068},
{'a': 0.8285714285714286,
'f1': 0.827937095282146,
'p': 0.766923736075407,
'r': 0.8994974874371859},
{'a': 0.7835408022130014,
'f1': 0.7016205910390848,
'p': 0.5832012678288431,
'r': 0.8803827751196173},
{'a': 0.7626582278481012,
'f1': 0.5689655172413792,
'p': 0.42950108459869846,
'r': 0.8425531914893617},
{'a': 0.8167247386759582,
'f1': 0.7674624226348364,
'p': 0.6625954198473283,
'r': 0.9117647058823529},
{'a': 0.6875776397515528,
'f1': 0.5755274261603376,
'p': 0.4289308176100629,
'r': 0.8743589743589744},
{'a': 0.7848872638634978,
'f1': 0.7252918287937743,
'p': 0.6044098573281452,
'r': 0.9066147859922179},
{'a': 0.915273132664437,
'f1': 0.8756137479541735,
'p': 0.8784893267651889,
'r': 0.8727569331158238},
{'a': 0.7367066895368782,
'f1': 0.7725925925925926,
'p': 0.6530995616781465,
'r': 0.9456029011786038},
{'a': 0.7989203778677463,
'f1': 0.6179487179487179,
'p': 0.5205183585313174,
'r': 0.7602523659305994},
{'a': 0.6863844977052524,
'f1': 0.7047527604416708,
'p': 0.5707620528771384,
'r': 0.9209535759096612},
{'a': 0.9134049186006234,
'f1': 0.9062265566391597,
'p': 0.94375,
'r': 0.8715728715728716},
{'a': 0.8586535072259429,
'f1': 0.8875175315568024,
'p': 0.8401486988847584,
'r': 0.9405469678953626},
{'a': 0.8676420551207894,
'f1': 0.8979805927091529,
'p': 0.852165256346441,
'r': 0.9490022172949002},
{'a': 0.7374233128834355,
'f1': 0.7720170454545455,
'p': 0.6516786570743405,
'r': 0.9468641114982579},
{'a': 0.7749169435215947,
'f1': 0.47984644913627633,
'p': 0.36231884057971014,
'r': 0.7102272727272727},
{'a': 0.7755662319835278,
'f1': 0.6765578635014837,
'p': 0.5652892561983471,
'r': 0.8423645320197044},
{'a': 0.8507462686567164,
'f1': 0.8557692307692308,
'p': 0.7837573385518591,
'r': 0.9423529411764706},
{'a': 0.8466257668711656,
'f1': 0.8619957537154989,
'p': 0.79296875,
'r': 0.9441860465116279},
{'a': 0.6668734491315137,
'f1': 0.5095890410958904,
'p': 0.3661417322834646,
'r': 0.8378378378378378},
{'a': 0.8207322872087494,
'f1': 0.7851851851851852,
'p': 0.7298728813559322,
'r': 0.8495684340320592},
{'a': 0.8586556169429097,
'f1': 0.8662309368191722,
'p': 0.8174342105263158,
'r': 0.9212233549582948},
{'a': 0.6675409836065573,
'f1': 0.5053658536585366,
'p': 0.36022253129346316,
'r': 0.8464052287581699},
{'a': 0.8410117434507678,
'f1': 0.8462882096069869,
'p': 0.7795655671761866,
'r': 0.9255014326647565}],
'news.yahoo.com;2010': [{'a': 0.8125,
'f1': 0.7473982970671712,
'p': 0.6954225352112676,
'r': 0.8077709611451943},
{'a': 0.792352371732817,
'f1': 0.49230769230769234,
'p': 0.37749546279491836,
'r': 0.7074829931972789},
{'a': 0.7949526813880127,
'f1': 0.6627756160830091,
'p': 0.5774011299435028,
'r': 0.7777777777777778},
{'a': 0.7983315197678637,
'f1': 0.7148717948717948,
'p': 0.6606635071090048,
'r': 0.7787709497206704},
{'a': 0.8179453836150845,
'f1': 0.7824397824397824,
'p': 0.7437223042836041,
'r': 0.8254098360655737},
{'a': 0.801693404634581,
'f1': 0.6120313862249346,
'p': 0.5043103448275862,
'r': 0.7782705099778271},
{'a': 0.8084656084656084,
'f1': 0.7454289732770745,
'p': 0.6824034334763949,
'r': 0.8212809917355371},
{'a': 0.7891472868217054,
'f1': 0.6472114137483787,
'p': 0.558165548098434,
'r': 0.7700617283950617},
{'a': 0.8128453038674033,
'f1': 0.7665805340223945,
'p': 0.7212317666126418,
'r': 0.8180147058823529},
{'a': 0.7882805816937554,
'f1': 0.5607808340727595,
'p': 0.4520743919885551,
'r': 0.7383177570093458},
{'a': 0.7756706753006476,
'f1': 0.41495778045838355,
'p': 0.29965156794425085,
'r': 0.6745098039215687},
{'a': 0.7818003913894325,
'f1': 0.4677804295942721,
'p': 0.3391003460207612,
'r': 0.7538461538461538},
{'a': 0.8298582151793161,
'f1': 0.8300943920044419,
'p': 0.806799784133837,
'r': 0.8547741566609491},
{'a': 0.7997992975413949,
'f1': 0.4017991004497751,
'p': 0.2809224318658281,
'r': 0.7052631578947368},
{'a': 0.8048540505083634,
'f1': 0.7550432276657061,
'p': 0.7005347593582888,
'r': 0.81875},
{'a': 0.8002373417721519,
'f1': 0.6918852959121414,
'p': 0.6203501094091903,
'r': 0.7820689655172414},
{'a': 0.7970494417862839,
'f1': 0.6688353936239428,
'p': 0.5881006864988558,
'r': 0.77526395173454},
{'a': 0.7986111111111112,
'f1': 0.6393562545720556,
'p': 0.5588235294117647,
'r': 0.747008547008547},
{'a': 0.799609375,
'f1': 0.673871582962492,
'p': 0.6057142857142858,
'r': 0.7593123209169055},
{'a': 0.7792207792207793,
'f1': 0.6678507992895204,
'p': 0.5784615384615385,
'r': 0.7899159663865546},
{'a': 0.8131220051603391,
'f1': 0.7050610820244329,
'p': 0.637223974763407,
'r': 0.7890625},
{'a': 0.7875927174645988,
'f1': 0.7403132728771641,
'p': 0.6834094368340944,
'r': 0.8075539568345323},
{'a': 0.8000719165767709,
'f1': 0.728780487804878,
'p': 0.6772438803263826,
'r': 0.7888067581837381},
{'a': 0.8107739515854074,
'f1': 0.754750331418471,
'p': 0.7,
'r': 0.8187919463087249},
{'a': 0.8030973451327433,
'f1': 0.5250800426894343,
'p': 0.41765704584040747,
'r': 0.7068965517241379}],
'news.yahoo.com;2015': [{'a': 0.3048423700544117,
'f1': 0.035215543412264724,
'p': 0.018075574600701208,
'r': 0.6803519061583577},
{'a': 0.34525586353944565,
'f1': 0.025003968883949835,
'p': 0.012745296378717378,
'r': 0.6548856548856549},
{'a': 0.2560697667057073,
'f1': 0.01878796735068785,
'p': 0.009526228883525974,
'r': 0.6766917293233082},
{'a': 0.3435495898583147,
'f1': 0.04774095842498107,
'p': 0.024713480419606526,
'r': 0.6997885835095138},
{'a': 0.38345512460183623,
'f1': 0.02623921085080148,
'p': 0.013366162504396765,
'r': 0.7112299465240641},
{'a': 0.3304576046566016,
'f1': 0.06543344214726152,
'p': 0.03422760217053087,
'r': 0.7411668036154478},
{'a': 0.3483621870718645,
'f1': 0.052654450640979206,
'p': 0.027355508729680914,
'r': 0.7003853564547207},
{'a': 0.3494353008685673,
'f1': 0.056013927787449846,
'p': 0.029069767441860465,
'r': 0.7660455486542443},
{'a': 0.2613521237506237,
'f1': 0.03416572750459695,
'p': 0.017480678185570347,
'r': 0.7506516072980017},
{'a': 0.38774996063612027,
'f1': 0.028579994004197064,
'p': 0.014579934747145187,
'r': 0.7185929648241206},
{'a': 0.2916898903840539,
'f1': 0.008692099104788083,
'p': 0.004374976629398347,
'r': 0.6573033707865169},
{'a': 0.34022892717958775,
'f1': 0.036302448804238864,
'p': 0.01866166077738516,
'r': 0.6636125654450262},
{'a': 0.3620855236554792,
'f1': 0.04883747220861439,
'p': 0.02526020348497252,
'r': 0.7330316742081447},
{'a': 0.33174694993689524,
'f1': 0.04314596588983848,
'p': 0.022292250233426705,
'r': 0.6686114352392065},
{'a': 0.24957875777119284,
'f1': 0.029311187103077677,
'p': 0.014970059880239521,
'r': 0.6976744186046512},
{'a': 0.3363527076518773,
'f1': 0.022886309376800855,
'p': 0.011643979057591623,
'r': 0.6634844868735084},
{'a': 0.3411867364746946,
'f1': 0.03392680875955105,
'p': 0.01741349545898071,
'r': 0.6562942008486563},
{'a': 0.2571102978941962,
'f1': 0.013344418153524759,
'p': 0.00674612582710089,
'r': 0.6089494163424124},
{'a': 0.3310363836824697,
'f1': 0.03394246426632894,
'p': 0.017437537180249853,
'r': 0.6346414073071719},
{'a': 0.34860527514807876,
'f1': 0.04199240562876927,
'p': 0.02163157289149695,
'r': 0.714828897338403},
{'a': 0.33099696356275304,
'f1': 0.01856045139017781,
'p': 0.009425426029256523,
'r': 0.6024096385542169},
{'a': 0.2689490523443717,
'f1': 0.024784973026843165,
'p': 0.012613009922822492,
'r': 0.7087980173482032},
{'a': 0.34358827597720065,
'f1': 0.07051195461299474,
'p': 0.03700552956188856,
'r': 0.7457142857142857},
{'a': 0.34750822755054067,
'f1': 0.06406581919951444,
'p': 0.03352152434721242,
'r': 0.721336370539104},
{'a': 0.25777743020254945,
'f1': 0.010921672433198549,
'p': 0.005512423993772223,
'r': 0.5835189309576837}],
'thenation.com;2000': [{'a': 0.948016415868673,
'f1': 0.963035019455253,
'p': 0.9611650485436893,
'r': 0.9649122807017544},
{'a': 0.900839054157132,
'f1': 0.9214975845410629,
'p': 0.8965922444183314,
'r': 0.9478260869565217},
{'a': 0.8160337552742616,
'f1': 0.844950213371266,
'p': 0.75,
'r': 0.9674267100977199},
{'a': 0.9159420289855073,
'f1': 0.9504950495049507,
'p': 0.9200264375413086,
'r': 0.9830508474576272},
{'a': 0.7521212121212121,
'f1': 0.8069844266163284,
'p': 0.6939935064935064,
'r': 0.963923337091319},
{'a': 0.8325673013788575,
'f1': 0.8754274548119199,
'p': 0.7950310559006211,
'r': 0.9739130434782609},
{'a': 0.9291457286432161,
'f1': 0.9593190998268898,
'p': 0.9308510638297872,
'r': 0.9895833333333334},
{'a': 0.9315551082033215,
'f1': 0.9603960396039604,
'p': 0.9371980676328503,
'r': 0.9847715736040609},
{'a': 0.8867091711623345,
'f1': 0.925459825750242,
'p': 0.8749237339841367,
'r': 0.9821917808219178},
{'a': 0.9161966156325544,
'f1': 0.9312169312169312,
'p': 0.9130998702983139,
'r': 0.9500674763832658},
{'a': 0.9013710747456878,
'f1': 0.9372007885102787,
'p': 0.8946236559139785,
'r': 0.984033116499113},
{'a': 0.7989382879893829,
'f1': 0.8438948995363215,
'p': 0.7465815861440291,
'r': 0.9703791469194313},
{'a': 0.92643391521197,
'f1': 0.8747346072186836,
'p': 0.8841201716738197,
'r': 0.865546218487395},
{'a': 0.9326113116726835,
'f1': 0.8828451882845187,
'p': 0.8865546218487395,
'r': 0.8791666666666667},
{'a': 0.929305912596401,
'f1': 0.9499089253187614,
'p': 0.924645390070922,
'r': 0.9765917602996255},
{'a': 0.8337819650067295,
'f1': 0.8753154972236243,
'p': 0.7903372835004557,
'r': 0.9807692307692307},
{'a': 0.9232209737827716,
'f1': 0.9459815546772069,
'p': 0.9220890410958904,
'r': 0.9711451758340848},
{'a': 0.9671549045716822,
'f1': 0.9795353982300885,
'p': 0.9838888888888889,
'r': 0.9752202643171806},
{'a': 0.8517538054268696,
'f1': 0.8909444985394352,
'p': 0.8198924731182796,
'r': 0.9754797441364605},
{'a': 0.9136400322841001,
'f1': 0.9288090485695276,
'p': 0.9148099606815203,
'r': 0.9432432432432433},
{'a': 0.949293246578416,
'f1': 0.9712248535777948,
'p': 0.9556502129792032,
'r': 0.9873155578565881},
{'a': 0.9605055292259084,
'f1': 0.9787835926449787,
'p': 0.9651324965132496,
'r': 0.9928263988522238},
{'a': 0.9318840579710145,
'f1': 0.9566020313942751,
'p': 0.9316546762589928,
'r': 0.9829222011385199},
{'a': 0.847394540942928,
'f1': 0.8894878706199462,
'p': 0.8256880733944955,
'r': 0.9639727361246349},
{'a': 0.8889570552147239,
'f1': 0.9224174882126017,
'p': 0.8762214983713354,
'r': 0.9737556561085973}],
'thenation.com;2005': [{'a': 0.7414854329093147,
'f1': 0.8374613003095975,
'p': 0.7300944669365722,
'r': 0.9818511796733213},
{'a': 0.5137777777777778,
'f1': 0.6188153310104529,
'p': 0.4563206577595067,
'r': 0.961038961038961},
{'a': 0.7658473479948253,
'f1': 0.852725793327909,
'p': 0.7561327561327561,
'r': 0.9776119402985075},
{'a': 0.849610270518111,
'f1': 0.9101861993428259,
'p': 0.8483920367534457,
'r': 0.9816893089190786},
{'a': 0.5731292517006803,
'f1': 0.6714659685863875,
'p': 0.5202839756592292,
'r': 0.9464944649446494},
{'a': 0.9057798891528107,
'f1': 0.9227774172615184,
'p': 0.8876404494382022,
'r': 0.9608108108108108},
{'a': 0.7099871959026889,
'f1': 0.8026143790849674,
'p': 0.6842496285289748,
'r': 0.9704952581664911},
{'a': 0.8706038487060385,
'f1': 0.8898927159796725,
'p': 0.8668866886688669,
'r': 0.91415313225058},
{'a': 0.7269180754226268,
'f1': 0.8161120840630474,
'p': 0.7039274924471299,
'r': 0.9708333333333333},
{'a': 0.9004950495049505,
'f1': 0.9133247089262614,
'p': 0.9168831168831169,
'r': 0.9097938144329897},
{'a': 0.826677994902294,
'f1': 0.8794326241134752,
'p': 0.8275862068965517,
'r': 0.9382093316519546},
{'a': 0.6467889908256881,
'f1': 0.7636224098234843,
'p': 0.6269691241335854,
'r': 0.9764474975466143},
{'a': 0.8954685890834192,
'f1': 0.913946587537092,
'p': 0.9120135363790186,
'r': 0.9158878504672897},
{'a': 0.7067342505430847,
'f1': 0.8055688910225637,
'p': 0.6894001643385373,
'r': 0.9688221709006929},
{'a': 0.7941558441558442,
'f1': 0.8731492597038816,
'p': 0.7905797101449276,
'r': 0.9749776586237712},
{'a': 0.9431714023831348,
'f1': 0.9671610169491526,
'p': 0.9620653319283456,
'r': 0.972310969116081},
{'a': 0.8959881129271917,
'f1': 0.9042407660738714,
'p': 0.8789893617021277,
'r': 0.9309859154929577},
{'a': 0.9060481503229595,
'f1': 0.9272727272727272,
'p': 0.9082813891362422,
'r': 0.947075208913649},
{'a': 0.8342046303211351,
'f1': 0.8497970230040597,
'p': 0.7733990147783252,
'r': 0.9429429429429429},
{'a': 0.8912901113294041,
'f1': 0.9011904761904763,
'p': 0.8822843822843823,
'r': 0.9209245742092458},
{'a': 0.842873831775701,
'f1': 0.8663686040735221,
'p': 0.8126747437092264,
'r': 0.9276595744680851},
{'a': 0.905373831775701,
'f1': 0.9209756097560975,
'p': 0.9129593810444874,
'r': 0.9291338582677166},
{'a': 0.865615141955836,
'f1': 0.8735905044510386,
'p': 0.8382687927107062,
'r': 0.9120198265179678},
{'a': 0.8798283261802575,
'f1': 0.89937106918239,
'p': 0.8674176776429809,
'r': 0.933768656716418},
{'a': 0.7283018867924528,
'f1': 0.8226600985221675,
'p': 0.712457337883959,
'r': 0.9731934731934732}],
'thenation.com;2010': [{'a': 0.675764192139738,
'f1': 0.6285178236397749,
'p': 0.489766081871345,
'r': 0.8769633507853403},
{'a': 0.6671180931744312,
'f1': 0.6212634822804315,
'p': 0.4818355640535373,
'r': 0.8742411101474414},
{'a': 0.6322725012431626,
'f1': 0.586756077116513,
'p': 0.44043624161073824,
'r': 0.8786610878661087},
{'a': 0.7086073777523592,
'f1': 0.647282796815507,
'p': 0.5114879649890591,
'r': 0.88124410933082},
{'a': 0.7182883341823739,
'f1': 0.7089473684210525,
'p': 0.5836221837088388,
'r': 0.9028150134048257},
{'a': 0.8147023086269745,
'f1': 0.45045045045045046,
'p': 0.3246753246753247,
'r': 0.7352941176470589},
{'a': 0.657844387755102,
'f1': 0.49695264885138307,
'p': 0.3559435862995299,
'r': 0.8229813664596274},
{'a': 0.4285228624851266,
'f1': 0.40663607483233327,
'p': 0.2612244897959184,
'r': 0.9171974522292994},
{'a': 0.8819702602230484,
'f1': 0.9008973858759267,
'p': 0.8726379440665155,
'r': 0.9310483870967742},
{'a': 0.8352638352638353,
'f1': 0.787551867219917,
'p': 0.7684210526315789,
'r': 0.8076595744680851},
{'a': 0.7230172927847347,
'f1': 0.6279535442531037,
'p': 0.5275908479138627,
'r': 0.7754698318496538},
{'a': 0.6940684223480187,
'f1': 0.6834733893557422,
'p': 0.5502255022550225,
'r': 0.9018817204301075},
{'a': 0.6265653869841922,
'f1': 0.6345187864175206,
'p': 0.48584615384615387,
'r': 0.9143022582513028},
{'a': 0.6097623966942148,
'f1': 0.5624094989863887,
'p': 0.4090143218197136,
'r': 0.8999073215940686},
{'a': 0.7384384384384385,
'f1': 0.6553225168183617,
'p': 0.553475935828877,
'r': 0.8031037827352085},
{'a': 0.5977851083883129,
'f1': 0.5124250214224507,
'p': 0.3676229508196721,
'r': 0.8454288407163054},
{'a': 0.7416363034117257,
'f1': 0.6842105263157894,
'p': 0.5588624338624338,
'r': 0.8820459290187892},
{'a': 0.7345368452204795,
'f1': 0.712592117910926,
'p': 0.5868073878627968,
'r': 0.9070146818923328},
{'a': 0.36462324393358875,
'f1': 0.40454817474566124,
'p': 0.2586404795306721,
'r': 0.9281464530892448},
{'a': 0.5604063701263042,
'f1': 0.4226469527587451,
'p': 0.28309178743961355,
'r': 0.833570412517781},
{'a': 0.5278008298755187,
'f1': 0.4557627929220469,
'p': 0.31123448726322667,
'r': 0.8508928571428571},
{'a': 0.8262844166903207,
'f1': 0.80875,
'p': 0.7398513436249285,
'r': 0.8917987594762233},
{'a': 0.5724090597117364,
'f1': 0.5379480840543882,
'p': 0.3837742504409171,
'r': 0.8991735537190083},
{'a': 0.7998363785110445,
'f1': 0.78475073313783,
'p': 0.6904024767801857,
'r': 0.9089673913043478},
{'a': 0.7134107027724049,
'f1': 0.6189455636519503,
'p': 0.4935064935064935,
'r': 0.8298850574712644}],
'thenation.com;2015': [{'a': 0.7011661807580175,
'f1': 0.7466007416563658,
'p': 0.631578947368421,
'r': 0.9128463476070529},
{'a': 0.6158984635938544,
'f1': 0.5607333842627961,
'p': 0.42305475504322765,
'r': 0.8312570781426953},
{'a': 0.6486733760292772,
'f1': 0.6437847866419295,
'p': 0.5090464547677261,
'r': 0.8755256518082423},
{'a': 0.7768453502312039,
'f1': 0.83955177933752,
'p': 0.7604282846308276,
'r': 0.9370533260032985},
{'a': 0.6675358539765319,
'f1': 0.6966452533904354,
'p': 0.567222006974041,
'r': 0.9025893958076449},
{'a': 0.6472923164162178,
'f1': 0.6589912280701754,
'p': 0.5306843267108168,
'r': 0.8691250903832248},
{'a': 0.6458094144661309,
'f1': 0.6523943661971832,
'p': 0.5220919747520288,
'r': 0.8693693693693694},
{'a': 0.543138866064092,
'f1': 0.3110285006195787,
'p': 0.19670846394984326,
'r': 0.742603550295858},
{'a': 0.6071055381400209,
'f1': 0.5534441805225654,
'p': 0.40853302162478083,
'r': 0.8576687116564418},
{'a': 0.6504384638645297,
'f1': 0.6545128511655709,
'p': 0.5179754020813624,
'r': 0.8887987012987013},
{'a': 0.5240253853127833,
'f1': 0.14634146341463414,
'p': 0.08272058823529412,
'r': 0.6338028169014085},
{'a': 0.6274137385248496,
'f1': 0.5923103567717354,
'p': 0.45943041375604515,
'r': 0.8333333333333334},
{'a': 0.650899593731863,
'f1': 0.6559908492993995,
'p': 0.5256645279560037,
'r': 0.8722433460076046},
{'a': 0.6512681159420289,
'f1': 0.6526315789473685,
'p': 0.514218009478673,
'r': 0.8930041152263375},
{'a': 0.6519756838905775,
'f1': 0.6560528687293481,
'p': 0.5182724252491694,
'r': 0.8936170212765957},
{'a': 0.5417523652817771,
'f1': 0.2967171717171717,
'p': 0.18905872888173772,
'r': 0.6891495601173021},
{'a': 0.7307525010874293,
'f1': 0.7867723045125732,
'p': 0.6813842482100239,
'r': 0.9307253463732681},
{'a': 0.7786984031334739,
'f1': 0.8467716699697507,
'p': 0.7636876763875823,
'r': 0.9501404494382022},
{'a': 0.7297186280550421,
'f1': 0.7874677002583979,
'p': 0.6924169270093723,
'r': 0.9127667540247099},
{'a': 0.7475834397227795,
'f1': 0.8104109589041095,
'p': 0.7160493827160493,
'r': 0.9334174818554749},
{'a': 0.6403210867551713,
'f1': 0.6274384393987849,
'p': 0.49520444220090865,
'r': 0.856020942408377},
{'a': 0.6655328798185941,
'f1': 0.6894736842105262,
'p': 0.561990561990562,
'r': 0.8917631041524847},
{'a': 0.7591199699135013,
'f1': 0.8191444303261329,
'p': 0.7310987903225806,
'r': 0.9313001605136436},
{'a': 0.5442651548190144,
'f1': 0.23218221895664953,
'p': 0.14044444444444446,
'r': 0.6694915254237288},
{'a': 0.612482853223594,
'f1': 0.5592823712948518,
'p': 0.41589327146171695,
'r': 0.8535714285714285}],
'www.cnn.com;2000': [{'a': 0.8130899937067338,
'f1': 0.7341092211280216,
'p': 0.68561872909699,
'r': 0.789980732177264},
{'a': 0.7033918691363964,
'f1': 0.7619231511874879,
'p': 0.6567909454061251,
'r': 0.9071264367816092},
{'a': 0.9567706842255941,
'f1': 0.9742628259757967,
'p': 0.9855172413793103,
'r': 0.963262554769127},
{'a': 0.7296494355317885,
'f1': 0.6033129904097646,
'p': 0.4798890429958391,
'r': 0.812206572769953},
{'a': 0.8298865910607072,
'f1': 0.7038327526132403,
'p': 0.62217659137577,
'r': 0.8101604278074866},
{'a': 0.8375254928619986,
'f1': 0.8686813186813187,
'p': 0.8187467633350596,
'r': 0.92510239906378},
{'a': 0.7469262295081968,
'f1': 0.77255985267035,
'p': 0.6549570647931303,
'r': 0.941638608305275},
{'a': 0.812,
'f1': 0.8061056105610561,
'p': 0.8196308724832215,
'r': 0.7930194805194806},
{'a': 0.7922141119221411,
'f1': 0.7634349030470916,
'p': 0.7638580931263859,
'r': 0.7630121816168328},
{'a': 0.8499701135684399,
'f1': 0.8163862472567666,
'p': 0.7994269340974212,
'r': 0.8340807174887892},
{'a': 0.7581291759465479,
'f1': 0.7517146776406034,
'p': 0.648776637726914,
'r': 0.8934782608695652},
{'a': 0.8777838131450298,
'f1': 0.8659916617033949,
'p': 0.8453488372093023,
'r': 0.8876678876678876},
{'a': 0.758496395468589,
'f1': 0.7159297395517868,
'p': 0.606776180698152,
'r': 0.8729689807976366},
{'a': 0.8103021297672115,
'f1': 0.8004168837936425,
'p': 0.7427466150870407,
'r': 0.8677966101694915},
{'a': 0.877246653919694,
'f1': 0.896551724137931,
'p': 0.8798228969006958,
'r': 0.9139290407358739},
{'a': 0.7289398280802293,
'f1': 0.6266771902131018,
'p': 0.5,
'r': 0.8393234672304439},
{'a': 0.7263533610945866,
'f1': 0.727810650887574,
'p': 0.5896452540747843,
'r': 0.9505409582689336},
{'a': 0.7041499330655957,
'f1': 0.44191919191919193,
'p': 0.30594405594405594,
'r': 0.7954545454545454},
{'a': 0.8116094986807388,
'f1': 0.7698259187620888,
'p': 0.766367137355584,
'r': 0.7733160621761658},
{'a': 0.8488805970149254,
'f1': 0.7996702390766693,
'p': 0.7601880877742947,
'r': 0.8434782608695652},
{'a': 0.8317631224764468,
'f1': 0.7093023255813954,
'p': 0.613682092555332,
'r': 0.8402203856749312},
{'a': 0.8600891861761427,
'f1': 0.8767795778105055,
'p': 0.8703703703703703,
'r': 0.8832838773491593},
{'a': 0.8866200967221923,
'f1': 0.9090909090909092,
'p': 0.9009393680614859,
'r': 0.9173913043478261},
{'a': 0.7409985597695631,
'f1': 0.7998516045260621,
'p': 0.7085113374958922,
'r': 0.9182282793867121},
{'a': 0.7255568138920347,
'f1': 0.7238890998860615,
'p': 0.6192332683560754,
'r': 0.8711151736745887}],
'www.cnn.com;2005': [{'a': 0.7973986993496749,
'f1': 0.6505608283002589,
'p': 0.5568685376661743,
'r': 0.7821576763485477},
{'a': 0.7814922480620154,
'f1': 0.6538756715272448,
'p': 0.534504391468005,
'r': 0.841897233201581},
{'a': 0.8120333772507685,
'f1': 0.7855711422845693,
'p': 0.7101449275362319,
'r': 0.8789237668161435},
{'a': 0.7939339875111507,
'f1': 0.7072243346007605,
'p': 0.6421173762945915,
'r': 0.7870239774330042},
{'a': 0.7925133689839572,
'f1': 0.6040816326530613,
'p': 0.4860426929392447,
'r': 0.7978436657681941},
{'a': 0.8149480415667466,
'f1': 0.7729279058361942,
'p': 0.701067615658363,
'r': 0.8612021857923498},
{'a': 0.7992213570634038,
'f1': 0.6518804243008679,
'p': 0.5425361155698234,
'r': 0.8164251207729468},
{'a': 0.7974481658692185,
'f1': 0.6186186186186187,
'p': 0.5132890365448505,
'r': 0.7783375314861462},
{'a': 0.8134087237479806,
'f1': 0.8023952095808382,
'p': 0.7397476340694006,
'r': 0.8766355140186916},
{'a': 0.8281767955801105,
'f1': 0.5576102418207681,
'p': 0.45794392523364486,
'r': 0.7127272727272728},
{'a': 0.8291413703382481,
'f1': 0.7895299145299146,
'p': 0.7147001934235977,
'r': 0.8818615751789977},
{'a': 0.8012170385395537,
'f1': 0.6512455516014235,
'p': 0.5414201183431953,
'r': 0.8169642857142857},
{'a': 0.7987890079180252,
'f1': 0.6940509915014165,
'p': 0.6041923551171393,
'r': 0.8153078202995009},
{'a': 0.8547993019197208,
'f1': 0.8256496227996648,
'p': 0.8047385620915033,
'r': 0.8476764199655766},
{'a': 0.8202293202293203,
'f1': 0.7898516036381045,
'p': 0.7313829787234043,
'r': 0.858480749219563},
{'a': 0.7980817768803634,
'f1': 0.6563573883161512,
'p': 0.5568513119533528,
'r': 0.799163179916318},
{'a': 0.8632313056954669,
'f1': 0.8067870826491517,
'p': 0.7543500511770727,
'r': 0.8670588235294118},
{'a': 0.7817047817047817,
'f1': 0.7172859450726979,
'p': 0.6195348837209302,
'r': 0.8516624040920716},
{'a': 0.7955215085444903,
'f1': 0.6320254506892895,
'p': 0.5173611111111112,
'r': 0.8119891008174387},
{'a': 0.7862723214285714,
'f1': 0.5379975874547648,
'p': 0.41838649155722324,
'r': 0.7533783783783784},
{'a': 0.8092676872155565,
'f1': 0.7634684453565932,
'p': 0.6914498141263941,
'r': 0.852233676975945},
{'a': 0.8084622383985441,
'f1': 0.7710712343665034,
'p': 0.6910331384015594,
'r': 0.8720787207872078},
{'a': 0.819971870604782,
'f1': 0.7408906882591093,
'p': 0.6428571428571429,
'r': 0.8742038216560509},
{'a': 0.8246376811594203,
'f1': 0.6657458563535911,
'p': 0.5751789976133651,
'r': 0.7901639344262295},
{'a': 0.8191964285714286,
'f1': 0.8329896907216495,
'p': 0.7816473189607518,
'r': 0.8915510718789408}],
'www.cnn.com;2010': [{'a': 0.7275031685678074,
'f1': 0.7248880358285349,
'p': 0.6343784994400896,
'r': 0.8455223880597015},
{'a': 0.6324081020255063,
'f1': 0.5346628679962013,
'p': 0.39900779588944013,
'r': 0.8100719424460432},
{'a': 0.7277505255781359,
'f1': 0.7082238077356365,
'p': 0.6224422442244224,
'r': 0.8214285714285714},
{'a': 0.6537997587454765,
'f1': 0.702127659574468,
'p': 0.5600165562913907,
'r': 0.9408901251738526},
{'a': 0.5586563307493541,
'f1': 0.14600000000000002,
'p': 0.09193954659949623,
'r': 0.35436893203883496},
{'a': 0.6614678899082569,
'f1': 0.4728571428571429,
'p': 0.338100102145046,
'r': 0.7862232779097387},
{'a': 0.45656706045865186,
'f1': 0.3919129082426127,
'p': 0.25237856785177765,
'r': 0.8765217391304347},
{'a': 0.6976923076923077,
'f1': 0.6330532212885154,
'p': 0.5191424196018377,
'r': 0.8110047846889952},
{'a': 0.6745749308026888,
'f1': 0.6304445442299056,
'p': 0.5254491017964071,
'r': 0.7878787878787878},
{'a': 0.7147385103011094,
'f1': 0.7341996455995274,
'p': 0.646049896049896,
'r': 0.8502051983584131},
{'a': 0.6349760139555168,
'f1': 0.39303843364757074,
'p': 0.26965174129353237,
'r': 0.7245989304812834},
{'a': 0.6150234741784038,
'f1': 0.5858585858585859,
'p': 0.4628307433851323,
'r': 0.7979724837074583},
{'a': 0.6288178224937119,
'f1': 0.5872952457051538,
'p': 0.4596622889305816,
'r': 0.8130530973451328},
{'a': 0.660952380952381,
'f1': 0.6959863364645602,
'p': 0.5705285264263213,
'r': 0.8921729611384783},
{'a': 0.21855983772819473,
'f1': 0.10666666666666667,
'p': 0.05723172628304821,
'r': 0.7829787234042553},
{'a': 0.6026184058529072,
'f1': 0.45454545454545453,
'p': 0.3225806451612903,
'r': 0.7692307692307693},
{'a': 0.6807069219440354,
'f1': 0.7090713902308106,
'p': 0.6051305542830967,
'r': 0.8561244329228775},
{'a': 0.605606258148631,
'f1': 0.5437405731523378,
'p': 0.41460609545715926,
'r': 0.7897042716319824},
{'a': 0.7437995397596523,
'f1': 0.7909015025041735,
'p': 0.7159047978843974,
'r': 0.8834498834498834},
{'a': 0.5363106014886341,
'f1': 0.41541973116916053,
'p': 0.28477051460361613,
'r': 0.767572633552015},
{'a': 0.11181766218919692,
'f1': 0.0687691961944715,
'p': 0.03584817244611059,
'r': 0.8422018348623853},
{'a': 0.6964824120603015,
'f1': 0.702950819672131,
'p': 0.6025857223159078,
'r': 0.8434303697875688},
{'a': 0.6620408163265306,
'f1': 0.5460526315789473,
'p': 0.4188393608074012,
'r': 0.784251968503937},
{'a': 0.7014111610006415,
'f1': 0.681491618200479,
'p': 0.5817757009345794,
'r': 0.8224607762180016},
{'a': 0.6079678607298292,
'f1': 0.5402434236356498,
'p': 0.4105011933174224,
'r': 0.7898966704936854}],
'www.cnn.com;2015': [{'a': 0.5461303017052908,
'f1': 0.6239130434782609,
'p': 0.47385800770500824,
'r': 0.9130434782608695},
{'a': 0.30808337569903405,
'f1': 0.3601316408086506,
'p': 0.22635933806146571,
'r': 0.8804597701149425},
{'a': 0.3871693866066404,
'f1': 0.33475870494807575,
'p': 0.21076923076923076,
'r': 0.8130563798219584},
{'a': 0.237528699645168,
'f1': 0.29844440176685233,
'p': 0.1796116504854369,
'r': 0.8819523269012486},
{'a': 0.28893905191873587,
'f1': 0.34402332361516036,
'p': 0.21354705274043434,
'r': 0.8843683083511777},
{'a': 0.487090367428004,
'f1': 0.5143394452280208,
'p': 0.3690958164642375,
'r': 0.8480620155038759},
{'a': 0.44073455759599334,
'f1': 0.4450579790171176,
'p': 0.30142109199700823,
'r': 0.8502109704641351},
{'a': 0.5386666666666666,
'f1': 0.5942142298670837,
'p': 0.4439252336448598,
'r': 0.8983451536643026},
{'a': 0.21576673866090712,
'f1': 0.2719069580910367,
'p': 0.1609304533586518,
'r': 0.875968992248062},
{'a': 0.10892214434551999,
'f1': 0.07365104371799922,
'p': 0.038901601830663615,
'r': 0.6900369003690037},
{'a': 0.30060493252675663,
'f1': 0.3346613545816733,
'p': 0.2074643249176729,
'r': 0.8649885583524027},
{'a': 0.2972493345164153,
'f1': 0.21739130434782608,
'p': 0.12746234067207415,
'r': 0.738255033557047},
{'a': 0.5377104377104377,
'f1': 0.551453773276707,
'p': 0.402479732951836,
'r': 0.8755186721991701},
{'a': 0.6343705799151343,
'f1': 0.731009365244537,
'p': 0.5955913522679102,
'r': 0.9461279461279462},
{'a': 0.4623908663532572,
'f1': 0.5923096511331806,
'p': 0.4293097083794758,
'r': 0.9548440065681445},
{'a': 0.5055798156234838,
'f1': 0.5628485628485629,
'p': 0.40923268870867124,
'r': 0.9010989010989011},
{'a': 0.20300230946882217,
'f1': 0.22848200312989042,
'p': 0.13204134366925063,
'r': 0.8474295190713101},
{'a': 0.40312876052948254,
'f1': 0.36,
'p': 0.23153526970954358,
'r': 0.808695652173913},
{'a': 0.5251872021783526,
'f1': 0.5427728613569321,
'p': 0.39372325249643364,
'r': 0.8734177215189873},
{'a': 0.29772374547335745,
'f1': 0.3710910354412787,
'p': 0.235657546337158,
'r': 0.8725490196078431},
{'a': 0.2304075235109718,
'f1': 0.24980901451489684,
'p': 0.14617791685292802,
'r': 0.8582677165354331},
{'a': 0.6290977208866687,
'f1': 0.7113702623906706,
'p': 0.5729941291585127,
'r': 0.9378603459320948},
{'a': 0.4444444444444444,
'f1': 0.4135188866799205,
'p': 0.27030539311241064,
'r': 0.879492600422833},
{'a': 0.5819144911085887,
'f1': 0.6747129820429792,
'p': 0.526896551724138,
'r': 0.9378068739770867},
{'a': 0.4675090252707581,
'f1': 0.4082246740220662,
'p': 0.27352150537634407,
'r': 0.8043478260869565}],
'www.esquire.com;2000': [{'a': 0.9610738255033557,
'f1': 0.9452830188679244,
'p': 0.9488636363636364,
'r': 0.9417293233082706},
{'a': 0.9625829812914907,
'f1': 0.9585006693440428,
'p': 0.9636608344549125,
'r': 0.9533954727030626},
{'a': 0.9491106719367589,
'f1': 0.9352608422375865,
'p': 0.9649805447470817,
'r': 0.9073170731707317},
{'a': 0.9604743083003953,
'f1': 0.9330357142857143,
'p': 0.9393258426966292,
'r': 0.926829268292683},
{'a': 0.9659798754192621,
'f1': 0.9649382716049383,
'p': 0.9731075697211156,
'r': 0.9569049951028403},
{'a': 0.9618320610687023,
'f1': 0.9403578528827038,
'p': 0.946,
'r': 0.9347826086956522},
{'a': 0.8702734147760326,
'f1': 0.7635206786850478,
'p': 0.7003891050583657,
'r': 0.8391608391608392},
{'a': 0.9437291368621841,
'f1': 0.9423264907135875,
'p': 0.9698189134808853,
'r': 0.9163498098859315},
{'a': 0.9411764705882353,
'f1': 0.8988988988988988,
'p': 0.9432773109243697,
'r': 0.858508604206501},
{'a': 0.954456415279138,
'f1': 0.9556931872320154,
'p': 0.9737864077669903,
'r': 0.9382600561272217},
{'a': 0.9400584795321637,
'f1': 0.9076576576576577,
'p': 0.9372093023255814,
'r': 0.8799126637554585},
{'a': 0.9430379746835443,
'f1': 0.9165964616680706,
'p': 0.9527145359019265,
'r': 0.8831168831168831},
{'a': 0.9679519278918377,
'f1': 0.96529284164859,
'p': 0.9705561613958561,
'r': 0.9600862998921251},
{'a': 0.9479048697621744,
'f1': 0.9159049360146252,
'p': 0.9488636363636364,
'r': 0.8851590106007067},
{'a': 0.9504480759093306,
'f1': 0.9304733727810651,
'p': 0.9588414634146342,
'r': 0.9037356321839081},
{'a': 0.9373088685015291,
'f1': 0.8918205804749341,
'p': 0.9234972677595629,
'r': 0.8622448979591837},
{'a': 0.9499749874937469,
'f1': 0.9528746465598492,
'p': 0.9674641148325359,
'r': 0.9387186629526463},
{'a': 0.950109649122807,
'f1': 0.9334308705193854,
'p': 0.9579579579579579,
'r': 0.9101283880171184},
{'a': 0.9045736871823828,
'f1': 0.842203548085901,
'p': 0.803921568627451,
'r': 0.884313725490196},
{'a': 0.8260325406758448,
'f1': 0.576219512195122,
'p': 0.4833759590792839,
'r': 0.7132075471698113},
{'a': 0.9553314121037464,
'f1': 0.9345991561181434,
'p': 0.9425531914893617,
'r': 0.9267782426778243},
{'a': 0.9464387464387465,
'f1': 0.9426829268292682,
'p': 0.9650436953807741,
'r': 0.9213349225268176},
{'a': 0.943345804382683,
'f1': 0.9206586826347306,
'p': 0.9564541213063764,
'r': 0.8874458874458875},
{'a': 0.1,
'f1': 0.09999999999999999,
'p': 0.05555555555555555,
'r': 0.5},
{'a': 0.9533295389869095,
'f1': 0.9243542435424354,
'p': 0.9488636363636364,
'r': 0.9010791366906474}],
'www.esquire.com;2005': [{'a': 0.9530398322851154,
'f1': 0.9464114832535886,
'p': 0.9611273080660836,
'r': 0.9321394910461829},
{'a': 0.9465422146796776,
'f1': 0.9371884346959123,
'p': 0.9572301425661914,
'r': 0.91796875},
{'a': 0.9585714285714285,
'f1': 0.9390329362298528,
'p': 0.938375350140056,
'r': 0.9396914446002805},
{'a': 0.9636363636363636,
'f1': 0.9542857142857142,
'p': 0.9553775743707094,
'r': 0.9531963470319634},
{'a': 0.9630901287553648,
'f1': 0.9570858283433133,
'p': 0.9609218436873748,
'r': 0.9532803180914513},
{'a': 0.9211159211159211,
'f1': 0.8790560471976402,
'p': 0.8989441930618401,
'r': 0.86002886002886},
{'a': 0.9058993847267462,
'f1': 0.9183417085427136,
'p': 0.8702380952380953,
'r': 0.9720744680851063},
{'a': 0.9181996086105675,
'f1': 0.9040844424047728,
'p': 0.8747779751332149,
'r': 0.9354226020892688},
{'a': 0.9591222030981067,
'f1': 0.9522373051784816,
'p': 0.958502024291498,
'r': 0.9460539460539461},
{'a': 0.9603463992707384,
'f1': 0.9502572898799314,
'p': 0.9432463110102156,
'r': 0.9573732718894009},
{'a': 0.9429404414827155,
'f1': 0.9302798982188295,
'p': 0.9185929648241206,
'r': 0.9422680412371134},
{'a': 0.920041004613019,
'f1': 0.8664383561643836,
'p': 0.840531561461794,
'r': 0.8939929328621908},
{'a': 0.9002638522427441,
'f1': 0.815968841285297,
'p': 0.7688073394495413,
'r': 0.8692946058091287},
{'a': 0.9523595505617978,
'f1': 0.9440928270042194,
'p': 0.9582441113490364,
'r': 0.9303534303534303},
{'a': 0.9564459930313589,
'f1': 0.9479166666666666,
'p': 0.9479166666666666,
'r': 0.9479166666666666},
{'a': 0.8827899298390425,
'f1': 0.8565656565656565,
'p': 0.7969924812030075,
'r': 0.925764192139738},
{'a': 0.9586449626044875,
'f1': 0.9499467518636849,
'p': 0.958109559613319,
'r': 0.941921858500528},
{'a': 0.9640317858636553,
'f1': 0.9595484477892757,
'p': 0.9622641509433962,
'r': 0.9568480300187617},
{'a': 0.9663256606990622,
'f1': 0.9613313754282917,
'p': 0.958984375,
'r': 0.9636898920510304},
{'a': 0.9699303263659699,
'f1': 0.9707142857142858,
'p': 0.9714081486776269,
'r': 0.9700214132762313},
{'a': 0.9461196243203164,
'f1': 0.920611798980335,
'p': 0.9390787518573551,
'r': 0.9028571428571428},
{'a': 0.948937908496732,
'f1': 0.943155979990905,
'p': 0.9308797127468582,
'r': 0.9557603686635945},
{'a': 0.9541052631578948,
'f1': 0.9472665699080792,
'p': 0.9616895874263262,
'r': 0.9332697807435653},
{'a': 0.9571852479864349,
'f1': 0.9505628976994616,
'p': 0.960435212660732,
'r': 0.9408914728682171},
{'a': 0.9658650116369278,
'f1': 0.9645732689210951,
'p': 0.9684721099434115,
'r': 0.9607056936647955}],
'www.esquire.com;2010': [{'a': 0.5643207012116525,
'f1': 0.4433465085638999,
'p': 0.30660592255125285,
'r': 0.8002378121284186},
{'a': 0.6595404595404596,
'f1': 0.711284310403253,
'p': 0.5828936406553735,
'r': 0.9122120817036071},
{'a': 0.4677680596047943,
'f1': 0.20742884708152434,
'p': 0.12092238470191226,
'r': 0.7288135593220338},
{'a': 0.5635330578512396,
'f1': 0.5412595005428882,
'p': 0.3940711462450593,
'r': 0.8639514731369151},
{'a': 0.5767780849459663,
'f1': 0.5699693564862105,
'p': 0.426279602750191,
'r': 0.8597842835130971},
{'a': 0.530896150113232,
'f1': 0.4304791830322074,
'p': 0.2860125260960334,
'r': 0.8698412698412699},
{'a': 0.4075716234652115,
'f1': 0.286652977412731,
'p': 0.1713303878252332,
'r': 0.8768844221105527},
{'a': 0.5689839572192513,
'f1': 0.5470213563132258,
'p': 0.3924731182795699,
'r': 0.9023485784919654},
{'a': 0.5669208519589798,
'f1': 0.5385261978145138,
'p': 0.3895419537900284,
'r': 0.8720508166969148},
{'a': 0.5872011251758087,
'f1': 0.6014935505770536,
'p': 0.4532742155525239,
'r': 0.8937457969065232},
{'a': 0.5289658906334597,
'f1': 0.45861854387056633,
'p': 0.3207136640557006,
'r': 0.8045851528384279},
{'a': 0.5566271700192891,
'f1': 0.5137503777576308,
'p': 0.3648068669527897,
'r': 0.8682328907048008},
{'a': 0.6042534531900899,
'f1': 0.6395046934291992,
'p': 0.49050245098039214,
'r': 0.9185312679288583},
{'a': 0.5003152585119798,
'f1': 0.31355565179731487,
'p': 0.19738276990185388,
'r': 0.7621052631578947},
{'a': 0.5922096657850445,
'f1': 0.5956127801621364,
'p': 0.45237232886635276,
'r': 0.8715980460572226},
{'a': 0.49784791965566716,
'f1': 0.2143658810325477,
'p': 0.12402597402597403,
'r': 0.7892561983471075},
{'a': 0.3755117231112765,
'f1': 0.17583497053045186,
'p': 0.0982436882546652,
'r': 0.8364485981308412},
{'a': 0.5544525547445256,
'f1': 0.5058290155440415,
'p': 0.35164340387212967,
'r': 0.9008073817762399},
{'a': 0.568724279835391,
'f1': 0.5549263873159682,
'p': 0.40312628547922663,
'r': 0.8900999091734787},
{'a': 0.6699975018735949,
'f1': 0.681763430498675,
'p': 0.5465430668211665,
'r': 0.9058898847631242},
{'a': 0.40389294403892945,
'f1': 0.26109435588108576,
'p': 0.15380710659898478,
'r': 0.8632478632478633},
{'a': 0.391304347826087,
'f1': 0.2546583850931677,
'p': 0.1490134994807892,
'r': 0.875},
{'a': 0.6918226600985221,
'f1': 0.7528445006321113,
'p': 0.629492600422833,
'r': 0.9363207547169812},
{'a': 0.5951573849878935,
'f1': 0.6032273374466066,
'p': 0.45785302593659943,
'r': 0.8838664812239221},
{'a': 0.630575117370892,
'f1': 0.6488145048814504,
'p': 0.5043365134431916,
'r': 0.9093041438623924}],
'www.esquire.com;2015': [{'a': 0.28155849110591824,
'f1': 0.01374795417348609,
'p': 0.006949500297835727,
'r': 0.6325301204819277},
{'a': 0.28711102754536055,
'f1': 0.03327383987761346,
'p': 0.017044341409260106,
'r': 0.696},
{'a': 0.27682545695615113,
'f1': 0.014328127016909773,
'p': 0.007239287810604579,
'r': 0.6894409937888198},
{'a': 0.46926977687626775,
'f1': 0.08112379280070238,
'p': 0.04307290695506247,
'r': 0.6957831325301205},
{'a': 0.3115534984047095,
'f1': 0.13151927437641722,
'p': 0.07138900855437258,
'r': 0.8339324227174695},
{'a': 0.3296091814111203,
'f1': 0.1882402484602832,
'p': 0.10571124512238382,
'r': 0.8583773403744599},
{'a': 0.2885415703320078,
'f1': 0.06056905605079986,
'p': 0.03155015584250366,
'r': 0.7549467275494672},
{'a': 0.28600444003589814,
'f1': 0.03177043300025621,
'p': 0.016255899318300997,
'r': 0.6966292134831461},
{'a': 0.3803981623277182,
'f1': 0.17495921696574226,
'p': 0.09766647694934548,
'r': 0.8387096774193549},
{'a': 0.480719397828233,
'f1': 0.5357822453876065,
'p': 0.37498552403011004,
'r': 0.9380069524913094},
{'a': 0.2905982905982906,
'f1': 0.05122118808170405,
'p': 0.026523482986156036,
'r': 0.7441016333938294},
{'a': 0.42642440556303274,
'f1': 0.22177133001927563,
'p': 0.12893712398254098,
'r': 0.7920289855072464},
{'a': 0.28823722302899707,
'f1': 0.04500314267756129,
'p': 0.023218107529671184,
'r': 0.7291242362525459},
{'a': 0.3103961736305388,
'f1': 0.11633420063602197,
'p': 0.06269084564092976,
'r': 0.8060897435897436},
{'a': 0.30111370823594114,
'f1': 0.08463893390959842,
'p': 0.04466265441875198,
'r': 0.8066361556064073},
{'a': 0.36065963688258146,
'f1': 0.0967741935483871,
'p': 0.05160673754629076,
'r': 0.7755834829443446},
{'a': 0.289927787677014,
'f1': 0.05231866825208085,
'p': 0.02706727967363854,
'r': 0.7798507462686567},
{'a': 0.28603752239087393,
'f1': 0.024475074069303104,
'p': 0.012450851900393184,
'r': 0.7142857142857143},
{'a': 0.3594932674687276,
'f1': 0.09215132693393563,
'p': 0.04895608351331893,
'r': 0.783109404990403},
{'a': 0.320497058048652,
'f1': 0.1641911963273022,
'p': 0.090838462917588,
'r': 0.8529741863075196},
{'a': 0.28706446607419944,
'f1': 0.023192887514495556,
'p': 0.011798636601992658,
'r': 0.6766917293233082},
{'a': 0.4193067197045035,
'f1': 0.3059682485779777,
'p': 0.18523848684210525,
'r': 0.878595806923452},
{'a': 0.33102908569192646,
'f1': 0.18479470198675496,
'p': 0.10366759793140344,
'r': 0.8499025341130604},
{'a': 0.32755466504050873,
'f1': 0.1720476241553148,
'p': 0.09607666966157533,
'r': 0.822142491030241},
{'a': 0.35135792460478316,
'f1': 0.058816609810610515,
'p': 0.03058103975535168,
'r': 0.7668711656441718},
{'a': 0.28490255928621744,
'f1': 0.034369055168040584,
'p': 0.01760998115537072,
'r': 0.7112860892388452}],
'www.forbes.com;2000': [{'a': 0.7603195739014648,
'f1': 0.7727272727272727,
'p': 0.6777408637873754,
'r': 0.8986784140969163},
{'a': 0.741304347826087,
'f1': 0.75564681724846,
'p': 0.6216216216216216,
'r': 0.9633507853403142},
{'a': 0.8787784356497351,
'f1': 0.9231073334651118,
'p': 0.8801356954391255,
'r': 0.9704904405652536},
{'a': 0.7747368421052632,
'f1': 0.7995003123048094,
'p': 0.6837606837606838,
'r': 0.9624060150375939},
{'a': 0.6669542709232097,
'f1': 0.5150753768844221,
'p': 0.36541889483065954,
'r': 0.8723404255319149},
{'a': 0.7552631578947369,
'f1': 0.757496740547588,
'p': 0.6579841449603624,
'r': 0.8924731182795699},
{'a': 0.8159443552701979,
'f1': 0.8610662358642972,
'p': 0.7781021897810219,
'r': 0.9638336347197106},
{'a': 0.717391304347826,
'f1': 0.6672550750220653,
'p': 0.5550660792951542,
'r': 0.8362831858407079},
{'a': 0.8219106957424714,
'f1': 0.8670027142303218,
'p': 0.7829131652661064,
'r': 0.9713292788879235},
{'a': 0.9711538461538461,
'f1': 0.9811202013845186,
'p': 0.9755944931163955,
'r': 0.9867088607594937},
{'a': 0.711376404494382,
'f1': 0.6888720666161998,
'p': 0.5963302752293578,
'r': 0.8154121863799283},
{'a': 0.880854252529037,
'f1': 0.9228155339805825,
'p': 0.8740229885057471,
'r': 0.9773778920308483},
{'a': 0.6704361873990307,
'f1': 0.5903614457831325,
'p': 0.49830508474576274,
'r': 0.7241379310344828},
{'a': 0.7071742313323572,
'f1': 0.661590524534687,
'p': 0.5634005763688761,
'r': 0.8012295081967213},
{'a': 0.6441837732160313,
'f1': 0.4468085106382979,
'p': 0.32450331125827814,
'r': 0.7170731707317073},
{'a': 0.7682789651293588,
'f1': 0.8140794223826715,
'p': 0.714172604908947,
'r': 0.9464847848898216},
{'a': 0.8058455114822547,
'f1': 0.8495145631067961,
'p': 0.7658643326039387,
'r': 0.9536784741144414},
{'a': 0.7821052631578947,
'f1': 0.8318440292445167,
'p': 0.7361610352264558,
'r': 0.9561157796451915},
{'a': 0.7606382978723404,
'f1': 0.7844598190526876,
'p': 0.7012369172216937,
'r': 0.8900966183574879},
{'a': 0.6845637583892618,
'f1': 0.36199095022624433,
'p': 0.23904382470119523,
'r': 0.7453416149068323},
{'a': 0.9127272727272727,
'f1': 0.84,
'p': 0.7777777777777778,
'r': 0.9130434782608695},
{'a': 0.7254335260115607,
'f1': 0.6900489396411094,
'p': 0.5834482758620689,
'r': 0.844311377245509},
{'a': 0.8086194302410519,
'f1': 0.8335451080050825,
'p': 0.7446083995459705,
'r': 0.9466089466089466},
{'a': 0.7429359062715368,
'f1': 0.7534699272967614,
'p': 0.641169853768279,
'r': 0.9134615384615384},
{'a': 0.8815298507462687,
'f1': 0.9247778874629812,
'p': 0.8794592564776568,
'r': 0.9750208159866778},
{'a': 0.7494922139471902,
'f1': 0.7787081339712919,
'p': 0.6636085626911316,
'r': 0.9421128798842258},
{'a': 0.733142037302726,
'f1': 0.7138461538461538,
'p': 0.5895806861499364,
'r': 0.9044834307992202},
{'a': 0.9517426273458445,
'f1': 0.9647058823529412,
'p': 0.9669811320754716,
'r': 0.9624413145539906}],
'www.forbes.com;2005': [{'a': 0.922463768115942,
'f1': 0.5868725868725868,
'p': 0.4935064935064935,
'r': 0.7238095238095238},
{'a': 0.893611404435058,
'f1': 0.8388644542183126,
'p': 0.8026013771996939,
'r': 0.8785594639865997},
{'a': 0.923974540311174,
'f1': 0.7174770039421814,
'p': 0.6807980049875312,
'r': 0.7583333333333333},
{'a': 0.8789716926632004,
'f1': 0.7067879636109169,
'p': 0.6242274412855378,
'r': 0.8145161290322581},
{'a': 0.9320754716981132,
'f1': 0.8411764705882353,
'p': 0.8125,
'r': 0.8719512195121951},
{'a': 0.9328429804924848,
'f1': 0.8467153284671532,
'p': 0.8215297450424929,
'r': 0.8734939759036144},
{'a': 0.9169241331960178,
'f1': 0.6620111731843575,
'p': 0.5895522388059702,
'r': 0.7547770700636943},
{'a': 0.9158091674462114,
'f1': 0.7926267281105991,
'p': 0.7510917030567685,
'r': 0.8390243902439024},
{'a': 0.9332394366197183,
'f1': 0.8892005610098177,
'p': 0.879740980573543,
'r': 0.8988657844990549},
{'a': 0.8787878787878788,
'f1': 0.0125,
'p': 0.0078125,
'r': 0.03125},
{'a': 0.9221065909807632,
'f1': 0.6535764375876578,
'p': 0.6348773841961853,
'r': 0.6734104046242775},
{'a': 0.9324227174694465,
'f1': 0.7062499999999999,
'p': 0.6420454545454546,
'r': 0.7847222222222222},
{'a': 0.9324990519529769,
'f1': 0.5972850678733032,
'p': 0.5116279069767442,
'r': 0.717391304347826},
{'a': 0.897003745318352,
'f1': 0.7887323943661972,
'p': 0.7368421052631579,
'r': 0.8484848484848485},
{'a': 0.923697270471464,
'f1': 0.8282122905027932,
'p': 0.8179310344827586,
'r': 0.8387553041018387},
{'a': 0.9231056364315043,
'f1': 0.6199261992619928,
'p': 0.56,
'r': 0.6942148760330579},
{'a': 0.9271175311884439,
'f1': 0.8388969521044993,
'p': 0.8210227272727273,
'r': 0.857566765578635},
{'a': 0.9274785801713586,
'f1': 0.8657223796033995,
'p': 0.854586129753915,
'r': 0.8771526980482205},
{'a': 0.9183731513083049,
'f1': 0.8444444444444444,
'p': 0.8358369098712446,
'r': 0.8532311062431545},
{'a': 0.9288114879315612,
'f1': 0.8535512256442489,
'p': 0.8382716049382716,
'r': 0.8693982074263764},
{'a': 0.8757146408153119,
'f1': 0.7795414462081128,
'p': 0.7163695299837926,
'r': 0.8549323017408124},
{'a': 0.918732350172576,
'f1': 0.778063410454156,
'p': 0.7566666666666667,
'r': 0.800705467372134},
{'a': 0.918977202711029,
'f1': 0.8101083032490974,
'p': 0.7912552891396333,
'r': 0.8298816568047337},
{'a': 0.926164136866478,
'f1': 0.8929503916449086,
'p': 0.9047619047619048,
'r': 0.8814432989690721},
{'a': 0.9195926025194318,
'f1': 0.8584905660377359,
'p': 0.8363970588235294,
'r': 0.8817829457364341}],
'www.forbes.com;2010': [{'a': 0.803030303030303,
'f1': 0.4956896551724137,
'p': 0.4121863799283154,
'r': 0.6216216216216216},
{'a': 0.8355041003376749,
'f1': 0.8311045071817731,
'p': 0.8121974830590513,
'r': 0.8509127789046653},
{'a': 0.834983498349835,
'f1': 0.7706422018348623,
'p': 0.6666666666666666,
'r': 0.9130434782608695},
{'a': 0.8385175380542687,
'f1': 0.7881944444444444,
'p': 0.6941896024464832,
'r': 0.9116465863453815},
{'a': 0.8763654419066534,
'f1': 0.8819345661450925,
'p': 0.8355795148247979,
'r': 0.9337349397590361},
{'a': 0.8413705583756346,
'f1': 0.7990353697749196,
'p': 0.7213352685050798,
'r': 0.8954954954954955},
{'a': 0.8154583582983823,
'f1': 0.7220216606498195,
'p': 0.6279434850863422,
'r': 0.8492569002123143},
{'a': 0.8404907975460123,
'f1': 0.8281249999999999,
'p': 0.7291005291005291,
'r': 0.9582753824756607},
{'a': 0.8345111896348646,
'f1': 0.8068728522336769,
'p': 0.7264851485148515,
'r': 0.9072642967542504},
{'a': 0.8261648745519713,
'f1': 0.742249778565102,
'p': 0.6506211180124224,
'r': 0.8639175257731959},
{'a': 0.8678511937812327,
'f1': 0.8598351001177856,
'p': 0.7883369330453563,
'r': 0.9455958549222798},
{'a': 0.8579756226815051,
'f1': 0.8538713195201745,
'p': 0.7997957099080695,
'r': 0.9157894736842105},
{'a': 0.8235294117647058,
'f1': 0.7715481171548119,
'p': 0.6749633967789166,
'r': 0.900390625},
{'a': 0.8467322151532678,
'f1': 0.8222669349429913,
'p': 0.7512254901960784,
'r': 0.9081481481481481},
{'a': 0.822998193859121,
'f1': 0.7762557077625571,
'p': 0.6737120211360634,
'r': 0.9156193895870736},
{'a': 0.8538390379278445,
'f1': 0.8438735177865613,
'p': 0.7870967741935484,
'r': 0.9094781682641108},
{'a': 0.7989521938441388,
'f1': 0.7039537126325941,
'p': 0.6684981684981685,
'r': 0.7433808553971487},
{'a': 0.8128772635814889,
'f1': 0.7360454115421002,
'p': 0.7021660649819494,
'r': 0.7733598409542743},
{'a': 0.8265830005704506,
'f1': 0.7342657342657343,
'p': 0.6552262090483619,
'r': 0.8349900596421471},
{'a': 0.8058429701765064,
'f1': 0.7498039215686273,
'p': 0.6297760210803689,
'r': 0.9263565891472868},
{'a': 0.8155028827674567,
'f1': 0.7587939698492463,
'p': 0.7365853658536585,
'r': 0.7823834196891192},
{'a': 0.5447540011855364,
'f1': 0.1812366737739872,
'p': 0.10278113663845223,
'r': 0.7657657657657657},
{'a': 0.5952802359882006,
'f1': 0.3466666666666667,
'p': 0.21487603305785125,
'r': 0.896551724137931},
{'a': 0.818087318087318,
'f1': 0.8064159292035399,
'p': 0.7098344693281402,
'r': 0.9334186939820742},
{'a': 0.8540609137055838,
'f1': 0.8275862068965517,
'p': 0.739946380697051,
'r': 0.9387755102040817}],
'www.forbes.com;2015': [{'a': 0.6009918845807033,
'f1': 0.5982750794371311,
'p': 0.45354439091534754,
'r': 0.8786666666666667},
{'a': 0.5228988424760946,
'f1': 0.4397163120567376,
'p': 0.30194805194805197,
'r': 0.808695652173913},
{'a': 0.5684647302904564,
'f1': 0.4474616292798111,
'p': 0.31530782029950083,
'r': 0.7703252032520326},
{'a': 0.5637982195845698,
'f1': 0.486013986013986,
'p': 0.347789824854045,
'r': 0.8065764023210832},
{'a': 0.5737037037037037,
'f1': 0.5218113834648941,
'p': 0.38456827924066134,
'r': 0.8113695090439277},
{'a': 0.5261813537675607,
'f1': 0.5360566902876198,
'p': 0.3793510324483776,
'r': 0.9133522727272727},
{'a': 0.6267262388302194,
'f1': 0.6455842653297338,
'p': 0.5097442143727162,
'r': 0.8801261829652997},
{'a': 0.6165389527458492,
'f1': 0.5712245626561941,
'p': 0.45454545454545453,
'r': 0.7684918347742555},
{'a': 0.43635551585529253,
'f1': 0.3984747378455672,
'p': 0.2570725707257073,
'r': 0.885593220338983},
{'a': 0.3153623188405797,
'f1': 0.2385557704706641,
'p': 0.13941220798794274,
'r': 0.8258928571428571},
{'a': 0.5164212910532276,
'f1': 0.3200636942675159,
'p': 0.20447609359104782,
'r': 0.7362637362637363},
{'a': 0.56483191725157,
'f1': 0.602832097100472,
'p': 0.4554253693326541,
'r': 0.8913260219341974},
{'a': 0.608122179798681,
'f1': 0.6417010472865756,
'p': 0.5017369727047146,
'r': 0.8899647887323944},
{'a': 0.5861520095503382,
'f1': 0.607250755287009,
'p': 0.4612736660929432,
'r': 0.8883977900552487},
{'a': 0.5475171232876712,
'f1': 0.5086006508600651,
'p': 0.36321381142098275,
'r': 0.8480620155038759},
{'a': 0.5645315487571702,
'f1': 0.48791455874086564,
'p': 0.3472,
'r': 0.8204158790170132},
{'a': 0.5972944849115505,
'f1': 0.5044814340588989,
'p': 0.3835171966255678,
'r': 0.7369077306733167},
{'a': 0.5224932249322494,
'f1': 0.36297903109182933,
'p': 0.23882017126546146,
'r': 0.7560240963855421},
{'a': 0.49003466204506063,
'f1': 0.46862302483069984,
'p': 0.31860036832412525,
'r': 0.8856655290102389},
{'a': 0.5722679200940071,
'f1': 0.5997067448680351,
'p': 0.4487109160724081,
'r': 0.9038674033149171},
{'a': 0.41139240506329117,
'f1': 0.26235509456985967,
'p': 0.15774027879677183,
'r': 0.7789855072463768},
{'a': 0.5893101873001371,
'f1': 0.5466464952092789,
'p': 0.40813253012048195,
'r': 0.8274809160305343},
{'a': 0.36214185063410054,
'f1': 0.09345794392523364,
'p': 0.050468637346791634,
'r': 0.6306306306306306},
{'a': 0.5615592435353145,
'f1': 0.5968772178850248,
'p': 0.44263157894736843,
'r': 0.9161220043572985},
{'a': 0.5688073394495413,
'f1': 0.531405782652044,
'p': 0.3867924528301887,
'r': 0.8487261146496815}],
'www.foxnews.com;2000': [{'a': 0.9518828451882845,
'f1': 0.9187279151943463,
'p': 0.9285714285714286,
'r': 0.9090909090909091},
{'a': 0.960635359116022,
'f1': 0.8800000000000001,
'p': 0.9330357142857143,
'r': 0.8326693227091634},
{'a': 0.9295958279009127,
'f1': 0.9021739130434783,
'p': 0.8691099476439791,
'r': 0.9378531073446328},
{'a': 0.9138438880706922,
'f1': 0.8956289027653881,
'p': 0.899641577060932,
'r': 0.8916518650088809},
{'a': 0.9568106312292359,
'f1': 0.8987012987012987,
'p': 0.9301075268817204,
'r': 0.8693467336683417},
{'a': 0.9374437443744374,
'f1': 0.9506567270145545,
'p': 0.926002766251729,
'r': 0.9766593727206418},
{'a': 0.7709205020920502,
'f1': 0.7818725099601594,
'p': 0.6618887015177066,
'r': 0.9549878345498783},
{'a': 0.9540372670807453,
'f1': 0.9357638888888891,
'p': 0.9373913043478261,
'r': 0.9341421143847487},
{'a': 0.9479315263908702,
'f1': 0.9557575757575759,
'p': 0.9651162790697675,
'r': 0.946578631452581},
{'a': 0.9376609994848016,
'f1': 0.8826382153249273,
'p': 0.9191919191919192,
'r': 0.8488805970149254},
{'a': 0.966804979253112,
'f1': 0.9065420560747663,
'p': 0.9326923076923077,
'r': 0.8818181818181818},
{'a': 0.8981636060100167,
'f1': 0.8539505187549881,
'p': 0.816793893129771,
'r': 0.8946488294314381},
{'a': 0.9306184012066365,
'f1': 0.8696883852691218,
'p': 0.8319783197831978,
'r': 0.9109792284866469},
{'a': 0.9382022471910112,
'f1': 0.9022222222222221,
'p': 0.8638297872340426,
'r': 0.9441860465116279},
{'a': 0.2631578947368421,
'f1': 0.3,
'p': 0.17647058823529413,
'r': 1.0},
{'a': 0.9135060129509713,
'f1': 0.9034589571502323,
'p': 0.8901322482197355,
'r': 0.9171907756813418},
{'a': 0.9339651482726995,
'f1': 0.9274680993955676,
'p': 0.933739012846518,
'r': 0.9212808539026017},
{'a': 0.9387078961899503,
'f1': 0.9285254346426272,
'p': 0.9327296248382924,
'r': 0.9243589743589744},
{'a': 0.933705512909979,
'f1': 0.8966267682263331,
'p': 0.865546218487395,
'r': 0.9300225733634312},
{'a': 0.9861636951882701,
'f1': 0.9925436806766332,
'p': 0.9959797498511018,
'r': 0.9891312384473198},
{'a': 0.9393139841688655,
'f1': 0.936986301369863,
'p': 0.9173819742489271,
'r': 0.9574468085106383},
{'a': 0.8996683250414593,
'f1': 0.9202373104812129,
'p': 0.8914431673052363,
'r': 0.9509536784741145},
{'a': 0.9659790083242852,
'f1': 0.9745533297238765,
'p': 0.9787928221859706,
'r': 0.9703504043126685},
{'a': 0.949358059914408,
'f1': 0.9095541401273886,
'p': 0.9037974683544304,
'r': 0.9153846153846154},
{'a': 0.9705240174672489,
'f1': 0.9608695652173913,
'p': 0.9822222222222222,
'r': 0.9404255319148936}],
'www.foxnews.com;2005': [{'a': 0.6034149484536082,
'f1': 0.6691749529696318,
'p': 0.5075417855686915,
'r': 0.9818611987381703},
{'a': 0.442833607907743,
'f1': 0.45749117741418033,
'p': 0.30135249366018596,
'r': 0.9494007989347537},
{'a': 0.458528951486698,
'f1': 0.4917743830787309,
'p': 0.33466613354658137,
'r': 0.9269102990033222},
{'a': 0.4705693148922483,
'f1': 0.5036188178528348,
'p': 0.3471933471933472,
'r': 0.9165751920965971},
{'a': 0.4147383410466358,
'f1': 0.3946980854197349,
'p': 0.24907063197026022,
'r': 0.950354609929078},
{'a': 0.3608128834355828,
'f1': 0.2672527472527472,
'p': 0.15565796210957503,
'r': 0.9440993788819876},
{'a': 0.551033386327504,
'f1': 0.6097291321171918,
'p': 0.44565656565656564,
'r': 0.9650043744531933},
{'a': 0.599594868332208,
'f1': 0.3762272089761571,
'p': 0.24183006535947713,
'r': 0.8468823993685872},
{'a': 0.7557954127315099,
'f1': 0.8411140371877743,
'p': 0.7352120535714286,
'r': 0.9826589595375722},
{'a': 0.5216294160057678,
'f1': 0.30194634402945814,
'p': 0.1810725552050473,
'r': 0.9082278481012658},
{'a': 0.5412064570943076,
'f1': 0.55,
'p': 0.38573933372296904,
'r': 0.9579100145137881},
{'a': 0.45275779376498804,
'f1': 0.3491158014831717,
'p': 0.21549295774647886,
'r': 0.918918918918919},
{'a': 0.46584641493423845,
'f1': 0.4211494252873563,
'p': 0.27213309566250743,
'r': 0.9308943089430894},
{'a': 0.3951965065502183,
'f1': 0.29228410832907514,
'p': 0.17470983506414173,
'r': 0.89375},
{'a': 0.47703180212014135,
'f1': 0.5151876116736153,
'p': 0.35262943334692215,
'r': 0.9558011049723757},
{'a': 0.456710653363373,
'f1': 0.48661800486618007,
'p': 0.3270645952575634,
'r': 0.9501187648456056},
{'a': 0.46838258659040355,
'f1': 0.5013412816691505,
'p': 0.33966074313408723,
'r': 0.9567690557451649},
{'a': 0.6817651632970451,
'f1': 0.7405294024409574,
'p': 0.5974424552429668,
'r': 0.9737390579408086},
{'a': 0.39481946624803765,
'f1': 0.3248686514886165,
'p': 0.19619249074563722,
'r': 0.9440203562340967},
{'a': 0.40449775112443775,
'f1': 0.41895845523698066,
'p': 0.26957831325301207,
'r': 0.9396325459317585},
{'a': 0.6685860524632118,
'f1': 0.7033218785796106,
'p': 0.5576748410535877,
'r': 0.951937984496124},
{'a': 0.46245186136071886,
'f1': 0.491962390051562,
'p': 0.33183306055646483,
'r': 0.9507620164126612},
{'a': 0.401333737496211,
'f1': 0.408859622867405,
'p': 0.2614854517611026,
'r': 0.9368998628257887},
{'a': 0.5186202686202687,
'f1': 0.5137218624730188,
'p': 0.35192226446979297,
'r': 0.9509132420091324},
{'a': 0.46610716591349255,
'f1': 0.4969586374695864,
'p': 0.3362139917695473,
'r': 0.9522144522144522}],
'www.foxnews.com;2010': [{'a': 0.7668161434977578,
'f1': 0.3764988009592326,
'p': 0.24881141045958796,
'r': 0.7733990147783252},
{'a': 0.7757424368581738,
'f1': 0.6937073540561032,
'p': 0.5988219895287958,
'r': 0.8243243243243243},
{'a': 0.2345960748516659,
'f1': 0.17103311913000496,
'p': 0.09495060373216246,
'r': 0.8606965174129353},
{'a': 0.7676311030741411,
'f1': 0.32189973614775724,
'p': 0.20854700854700856,
'r': 0.7052023121387283},
{'a': 0.7303617099013519,
'f1': 0.5472392638036809,
'p': 0.40879926672777267,
'r': 0.8274582560296846},
{'a': 0.7756373937677054,
'f1': 0.6793522267206478,
'p': 0.5883590462833099,
'r': 0.803639846743295},
{'a': 0.7619047619047619,
'f1': 0.5780474351902922,
'p': 0.45565217391304347,
'r': 0.7903469079939668},
{'a': 0.7622270070747462,
'f1': 0.6082108464267614,
'p': 0.5050505050505051,
'r': 0.7643312101910829},
{'a': 0.7117411850236278,
'f1': 0.507147296457427,
'p': 0.36298932384341637,
'r': 0.8412371134020619},
{'a': 0.7625284738041003,
'f1': 0.6472081218274112,
'p': 0.5379746835443038,
'r': 0.8121019108280255},
{'a': 0.7522093813732155,
'f1': 0.4774193548387096,
'p': 0.3501577287066246,
'r': 0.75},
{'a': 0.760662671836447,
'f1': 0.4270042194092827,
'p': 0.3092909535452323,
'r': 0.6893732970027248},
{'a': 0.749707145646232,
'f1': 0.4784377542717656,
'p': 0.35336538461538464,
'r': 0.7405541561712846},
{'a': 0.8027233477250083,
'f1': 0.7522935779816513,
'p': 0.6424501424501424,
'r': 0.9074446680080482},
{'a': 0.7579972183588317,
'f1': 0.6481294236602629,
'p': 0.5136217948717948,
'r': 0.8780821917808219},
{'a': 0.7645959831854273,
'f1': 0.3471502590673575,
'p': 0.22521008403361345,
'r': 0.7570621468926554},
{'a': 0.7599640395564878,
'f1': 0.607545320921117,
'p': 0.5,
'r': 0.7740324594257179},
{'a': 0.811549368393916,
'f1': 0.8121305576972501,
'p': 0.7301293900184843,
'r': 0.9148812970469021},
{'a': 0.7687253613666228,
'f1': 0.38028169014084506,
'p': 0.2583732057416268,
'r': 0.72},
{'a': 0.7672496025437202,
'f1': 0.7031630170316301,
'p': 0.5776149233844103,
'r': 0.8984455958549222},
{'a': 0.7780074410913601,
'f1': 0.5251989389920424,
'p': 0.3907894736842105,
'r': 0.8005390835579514},
{'a': 0.7532252729077076,
'f1': 0.5155844155844156,
'p': 0.3899803536345776,
'r': 0.7605363984674329},
{'a': 0.7881653607133208,
'f1': 0.7731481481481481,
'p': 0.6816326530612244,
'r': 0.893048128342246},
{'a': 0.7628689087165408,
'f1': 0.46805234795996925,
'p': 0.3370288248337029,
'r': 0.7657430730478589},
{'a': 0.7885294117647059,
'f1': 0.7472759226713532,
'p': 0.6545566502463054,
'r': 0.8705978705978706}],
'www.foxnews.com;2015': [{'a': 0.8563569682151589,
'f1': 0.8865282472235635,
'p': 0.8652214891611687,
'r': 0.9089108910891089},
{'a': 0.8018691588785046,
'f1': 0.7188328912466844,
'p': 0.6878172588832487,
'r': 0.7527777777777778},
{'a': 0.8519888991674376,
'f1': 0.6444444444444445,
'p': 0.6223175965665236,
'r': 0.6682027649769585},
{'a': 0.7843719090009891,
'f1': 0.6812865497076024,
'p': 0.589873417721519,
'r': 0.8062283737024222},
{'a': 0.8609422492401215,
'f1': 0.8042780748663102,
'p': 0.7752577319587629,
'r': 0.8355555555555556},
{'a': 0.8248520710059172,
'f1': 0.672566371681416,
'p': 0.5984251968503937,
'r': 0.7676767676767676},
{'a': 0.826963906581741,
'f1': 0.7433070866141732,
'p': 0.6685552407932012,
'r': 0.8368794326241135},
{'a': 0.8463476070528967,
'f1': 0.7328467153284672,
'p': 0.7011173184357542,
'r': 0.7675840978593272},
{'a': 0.8742857142857143,
'f1': 0.8811524609843938,
'p': 0.8706998813760379,
'r': 0.8918590522478737},
{'a': 0.8710010319917441,
'f1': 0.5954692556634303,
'p': 0.5227272727272727,
'r': 0.6917293233082706},
{'a': 0.8536170212765958,
'f1': 0.8093126385809313,
'p': 0.8039647577092511,
'r': 0.8147321428571429},
{'a': 0.8363201911589009,
'f1': 0.766609880749574,
'p': 0.7009345794392523,
'r': 0.8458646616541353},
{'a': 0.8519900497512438,
'f1': 0.8344923504867872,
'p': 0.8075370121130552,
'r': 0.8633093525179856},
{'a': 0.8399339933993399,
'f1': 0.8283185840707965,
'p': 0.7878787878787878,
'r': 0.8731343283582089},
{'a': 0.8649334178820546,
'f1': 0.8924785461887934,
'p': 0.8700787401574803,
'r': 0.9160621761658031},
{'a': 0.8697394789579158,
'f1': 0.8959167333867094,
'p': 0.8952,
'r': 0.8966346153846154},
{'a': 0.7922077922077922,
'f1': 0.6363636363636364,
'p': 0.5645161290322581,
'r': 0.7291666666666666},
{'a': 0.8699234844025897,
'f1': 0.9002257336343116,
'p': 0.8815207780725022,
'r': 0.9197416974169742},
{'a': 0.853763440860215,
'f1': 0.84012539184953,
'p': 0.8271604938271605,
'r': 0.8535031847133758},
{'a': 0.8404864091559371,
'f1': 0.8631062001227747,
'p': 0.8155452436194895,
'r': 0.9165580182529335},
{'a': 0.8048780487804879,
'f1': 0.8079999999999998,
'p': 0.7917133258678611,
'r': 0.8249708284714119},
{'a': 0.8483572030328559,
'f1': 0.8369565217391305,
'p': 0.8048780487804879,
'r': 0.8716981132075472},
{'a': 0.8672086720867209,
'f1': 0.8122605363984674,
'p': 0.8153846153846154,
'r': 0.8091603053435115},
{'a': 0.8849701573521432,
'f1': 0.8921668362156663,
'p': 0.8921668362156663,
'r': 0.8921668362156663},
{'a': 0.7781094527363184,
'f1': 0.6836879432624113,
'p': 0.5863746958637469,
'r': 0.8197278911564626},
{'a': 0.8762641284949435,
'f1': 0.8725490196078431,
'p': 0.8651275820170109,
'r': 0.8800988875154512}],
'www.latimes.com;2000': [{'a': 0.8859138533178114,
'f1': 0.9079812206572769,
'p': 0.8719567177637512,
'r': 0.9471106758080313},
{'a': 0.9060402684563759,
'f1': 0.9296754250386399,
'p': 0.9011235955056179,
'r': 0.960095770151636},
{'a': 0.8181818181818182,
'f1': 0.7275541795665633,
'p': 0.6167979002624672,
'r': 0.8867924528301887},
{'a': 0.8636176349402555,
'f1': 0.8996665656259473,
'p': 0.8436611711199545,
'r': 0.9636363636363636},
{'a': 0.9081803005008348,
'f1': 0.9397590361445783,
'p': 0.9081287044877223,
'r': 0.9736722650930549},
{'a': 0.8790149892933619,
'f1': 0.9011373578302712,
'p': 0.865546218487395,
'r': 0.9397810218978102},
{'a': 0.8671875,
'f1': 0.8919262555626193,
'p': 0.8565323565323565,
'r': 0.9303713527851459},
{'a': 0.8974439886399496,
'f1': 0.8914132976946207,
'p': 0.8567758509955041,
'r': 0.9289693593314763},
{'a': 0.906754772393539,
'f1': 0.9151069518716578,
'p': 0.8912760416666666,
'r': 0.9402472527472527},
{'a': 0.864039408866995,
'f1': 0.8217054263565892,
'p': 0.7718446601941747,
'r': 0.8784530386740331},
{'a': 0.95949263502455,
'f1': 0.9440993788819876,
'p': 0.9675925925925926,
'r': 0.9217199558985667},
{'a': 0.8910433979686058,
'f1': 0.9293695131683959,
'p': 0.8885921404044258,
'r': 0.9740694270179842},
{'a': 0.8336025848142165,
'f1': 0.851227732306211,
'p': 0.7864768683274022,
'r': 0.9275970619097587},
{'a': 0.8811685748124753,
'f1': 0.9156153630501823,
'p': 0.8850948509485095,
'r': 0.9483159117305459},
{'a': 0.879980563654033,
'f1': 0.9096892138939672,
'p': 0.8723702664796634,
'r': 0.9503437738731857},
{'a': 0.8948170731707317,
'f1': 0.9210827296988181,
'p': 0.884981684981685,
'r': 0.9602543720190779},
{'a': 0.8715647784632642,
'f1': 0.9174477289113193,
'p': 0.8724717175179979,
'r': 0.9673128088179399},
{'a': 0.8863366336633663,
'f1': 0.9081011847582452,
'p': 0.8807453416149068,
'r': 0.9372108393919365},
{'a': 0.8540250447227191,
'f1': 0.9017341040462428,
'p': 0.8482102401449932,
'r': 0.9624678663239075},
{'a': 0.8820047355958959,
'f1': 0.9161290322580645,
'p': 0.8850948509485095,
'r': 0.9494186046511628},
{'a': 0.875943000838223,
'f1': 0.9118522930315663,
'p': 0.870380898237635,
'r': 0.957473420888055},
{'a': 0.8548465660009742,
'f1': 0.8876319758672699,
'p': 0.8306280875088214,
'r': 0.9530364372469635},
{'a': 0.867056856187291,
'f1': 0.8451801363193768,
'p': 0.8097014925373134,
'r': 0.8839103869653768},
{'a': 0.8775137111517367,
'f1': 0.9103678929765886,
'p': 0.8635786802030457,
'r': 0.9625176803394625},
{'a': 0.9000886786875554,
'f1': 0.9353481254781943,
'p': 0.9015486725663717,
'r': 0.9717806041335453}],
'www.latimes.com;2005': [{'a': 0.7422196124486201,
'f1': 0.7298461538461539,
'p': 0.6161038961038962,
'r': 0.8950943396226415},
{'a': 0.6051838456901748,
'f1': 0.4910644910644911,
'p': 0.3484013230429989,
'r': 0.8315789473684211},
{'a': 0.7498252969951084,
'f1': 0.7250384024577572,
'p': 0.6059050064184852,
'r': 0.9024856596558317},
{'a': 0.7478488589599701,
'f1': 0.7092320966350302,
'p': 0.5796897038081805,
'r': 0.9133333333333333},
{'a': 0.779495990836197,
'f1': 0.7187728268809349,
'p': 0.5992691839220463,
'r': 0.8978102189781022},
{'a': 0.9032732622287606,
'f1': 0.927488282326992,
'p': 0.9241758241758242,
'r': 0.9308245711123408},
{'a': 0.8685015290519877,
'f1': 0.7754569190600522,
'p': 0.7156626506024096,
'r': 0.8461538461538461},
{'a': 0.7058096415327565,
'f1': 0.7202194357366772,
'p': 0.6038107752956636,
'r': 0.8922330097087379},
{'a': 0.7935819601040763,
'f1': 0.8212318477716575,
'p': 0.7263064658990257,
'r': 0.9447004608294931},
{'a': 0.7500845451471085,
'f1': 0.8024592354985296,
'p': 0.7117117117117117,
'r': 0.9197303921568627},
{'a': 0.7272727272727273,
'f1': 0.7387698686938493,
'p': 0.619351100811124,
'r': 0.9152397260273972},
{'a': 0.8605957446808511,
'f1': 0.9013134112543679,
'p': 0.8558352402745996,
'r': 0.9518961567828964},
{'a': 0.7504501260352899,
'f1': 0.7099204688154039,
'p': 0.5812200137080192,
'r': 0.9118279569892473},
{'a': 0.7527058051820269,
'f1': 0.7320540156361051,
'p': 0.6311274509803921,
'r': 0.871404399323181},
{'a': 0.7738570113531759,
'f1': 0.7661059980958426,
'p': 0.6675884955752213,
'r': 0.8987341772151899},
{'a': 0.7750533049040512,
'f1': 0.7287917737789202,
'p': 0.6169749727965179,
'r': 0.8901098901098901},
{'a': 0.8182175107970161,
'f1': 0.84127528282482,
'p': 0.7649625935162094,
'r': 0.9345011424219345},
{'a': 0.7622868605817452,
'f1': 0.7208480565371025,
'p': 0.6169354838709677,
'r': 0.8668555240793201},
{'a': 0.6897179253867152,
'f1': 0.6960784313725491,
'p': 0.5606604450825556,
'r': 0.917743830787309},
{'a': 0.6222222222222222,
'f1': 0.3751178133836004,
'p': 0.24968632371392724,
'r': 0.7537878787878788},
{'a': 0.6855524079320113,
'f1': 0.6961678832116787,
'p': 0.5593841642228738,
'r': 0.9214975845410628},
{'a': 0.8013661202185792,
'f1': 0.851844304055431,
'p': 0.7766629505759941,
'r': 0.9431407942238267},
{'a': 0.9086802194256212,
'f1': 0.9246739419749801,
'p': 0.9234449760765551,
'r': 0.92590618336887},
{'a': 0.732839313572543,
'f1': 0.7519014849692142,
'p': 0.6306196840826246,
'r': 0.9309417040358744},
{'a': 0.6179577464788732,
'f1': 0.354806739345887,
'p': 0.2315653298835705,
'r': 0.7584745762711864}],
'www.latimes.com;2010': [{'a': 0.5850843444806155,
'f1': 0.49166062364031904,
'p': 0.3570300157977883,
'r': 0.789289871944121},
{'a': 0.49572649572649574,
'f1': 0.41124886604172967,
'p': 0.27287319422150885,
'r': 0.8343558282208589},
{'a': 0.6794190577399929,
'f1': 0.3728343728343728,
'p': 0.2642436149312377,
'r': 0.6329411764705882},
{'a': 0.7071513002364066,
'f1': 0.47704485488126647,
'p': 0.3772954924874791,
'r': 0.648493543758967},
{'a': 0.8660617059891107,
'f1': 0.8762990278243379,
'p': 0.8378205128205128,
'r': 0.9184820801124385},
{'a': 0.8609794628751974,
'f1': 0.8613081166272655,
'p': 0.813849590469099,
'r': 0.9146443514644351},
{'a': 0.5482108713466266,
'f1': 0.4580602883355177,
'p': 0.3210840606338999,
'r': 0.7988571428571428},
{'a': 0.7263668192835981,
'f1': 0.49653121902874137,
'p': 0.3691967575534267,
'r': 0.7579425113464447},
{'a': 0.8486257928118394,
'f1': 0.8380090497737557,
'p': 0.7860780984719864,
'r': 0.8972868217054264},
{'a': 0.8374751491053678,
'f1': 0.7872478854912167,
'p': 0.7092614302461899,
'r': 0.8845029239766082},
{'a': 0.5851926977687627,
'f1': 0.1452455590386625,
'p': 0.08128654970760234,
'r': 0.6813725490196079},
{'a': 0.8460222412318221,
'f1': 0.8331788693234478,
'p': 0.779705117085863,
'r': 0.8945273631840795},
{'a': 0.7058642922935217,
'f1': 0.5801928133216476,
'p': 0.4652143359100492,
'r': 0.770663562281723},
{'a': 0.574838388861263,
'f1': 0.5037724898432966,
'p': 0.36717428087986465,
'r': 0.8022181146025879},
{'a': 0.63409915356711,
'f1': 0.41013645224171547,
'p': 0.2743870631194575,
'r': 0.8117283950617284},
{'a': 0.5813497619714366,
'f1': 0.27532719340765877,
'p': 0.1658878504672897,
'r': 0.8091168091168092},
{'a': 0.7512280701754386,
'f1': 0.5599006828057107,
'p': 0.41566820276497696,
'r': 0.8574144486692015},
{'a': 0.5737658674188999,
'f1': 0.5066927848514529,
'p': 0.3653483992467043,
'r': 0.8264110756123536},
{'a': 0.44818136522172397,
'f1': 0.33860853986264555,
'p': 0.21283783783783783,
'r': 0.8277372262773722},
{'a': 0.5393258426966292,
'f1': 0.43914415994387934,
'p': 0.30023980815347723,
'r': 0.8172323759791122},
{'a': 0.8528493364558938,
'f1': 0.8547206165703276,
'p': 0.811265544989027,
'r': 0.9030944625407166},
{'a': 0.5451306413301663,
'f1': 0.44046749452154854,
'p': 0.3015,
'r': 0.8170731707317073},
{'a': 0.6951649055395454,
'f1': 0.5310344827586208,
'p': 0.41945525291828795,
'r': 0.723489932885906},
{'a': 0.8481414324569356,
'f1': 0.8239621650026274,
'p': 0.7574879227053141,
'r': 0.9032258064516129},
{'a': 0.7052851597491788,
'f1': 0.2595648912228057,
'p': 0.1610800744878957,
'r': 0.667953667953668}],
'www.latimes.com;2015': [{'a': 0.5938778389053463,
'f1': 0.1715107913669065,
'p': 0.09600515463917526,
'r': 0.8032345013477089},
{'a': 0.63568345323741,
'f1': 0.18637532133676094,
'p': 0.10681399631675875,
'r': 0.7304785894206549},
{'a': 0.5944452121044632,
'f1': 0.19522895530573073,
'p': 0.11069651741293532,
'r': 0.8259860788863109},
{'a': 0.311409056412851,
'f1': 0.35004775549188155,
'p': 0.21673565937315198,
'r': 0.9094292803970223},
{'a': 0.23751617076326004,
'f1': 0.21643180005317736,
'p': 0.12378345498783455,
'r': 0.8604651162790697},
{'a': 0.5991861648016277,
'f1': 0.13758599124452783,
'p': 0.07498295841854125,
'r': 0.8333333333333334},
{'a': 0.3306508875739645,
'f1': 0.3897280966767372,
'p': 0.24814509480626545,
'r': 0.907537688442211},
{'a': 0.603215251102575,
'f1': 0.1875910282551704,
'p': 0.10595590654820665,
'r': 0.817258883248731},
{'a': 0.5912418842381545,
'f1': 0.19832023841777296,
'p': 0.11244239631336406,
'r': 0.8394495412844036},
{'a': 0.6033519553072626,
'f1': 0.292358803986711,
'p': 0.1753487048107031,
'r': 0.8787446504992867},
{'a': 0.580749718151071,
'f1': 0.12164157071154416,
'p': 0.06592,
'r': 0.7862595419847328},
{'a': 0.5854936959909336,
'f1': 0.13534278959810875,
'p': 0.07360977177756349,
'r': 0.8388278388278388},
{'a': 0.23728315201411349,
'f1': 0.19089207735495947,
'p': 0.10778443113772455,
'r': 0.8337874659400545},
{'a': 0.5976490582070528,
'f1': 0.17484751670055182,
'p': 0.09798177083333333,
'r': 0.8113207547169812},
{'a': 0.601409666283084,
'f1': 0.15851806863042817,
'p': 0.08805668016194332,
'r': 0.7933130699088146},
{'a': 0.5995661605206074,
'f1': 0.1482620732082436,
'p': 0.08166723144696712,
'r': 0.8033333333333333},
{'a': 0.6116892373485389,
'f1': 0.18491921005385994,
'p': 0.10492359932088285,
'r': 0.7783375314861462},
{'a': 0.59974993053626,
'f1': 0.20786362386582347,
'p': 0.11954459203036052,
'r': 0.7957894736842105},
{'a': 0.5972012621758814,
'f1': 0.22818086225026288,
'p': 0.13183475091130012,
'r': 0.84765625},
{'a': 0.2567389875082183,
'f1': 0.19565990750622553,
'p': 0.11079774375503626,
'r': 0.8358662613981763},
{'a': 0.6091758708581139,
'f1': 0.22384701912260968,
'p': 0.1285529715762274,
'r': 0.8652173913043478},
{'a': 0.608569161597461,
'f1': 0.31417979610750696,
'p': 0.19093213179386087,
'r': 0.8862745098039215},
{'a': 0.27805978567399886,
'f1': 0.26857142857142857,
'p': 0.15921409214092141,
'r': 0.8576642335766423},
{'a': 0.2214304565848509,
'f1': 0.1873278236914601,
'p': 0.10559006211180125,
'r': 0.8292682926829268},
{'a': 0.23664980326025858,
'f1': 0.20631209818819407,
'p': 0.11719787516600266,
'r': 0.8609756097560975}],
'www.nymag.com;2000': [{'a': 0.9425414364640884,
'f1': 0.9440860215053763,
'p': 0.9251844046364595,
'r': 0.9637760702524698},
{'a': 0.9427288040426727,
'f1': 0.9430803571428572,
'p': 0.9224890829694323,
'r': 0.9646118721461188},
{'a': 0.9402366863905326,
'f1': 0.9463051568314725,
'p': 0.9368421052631579,
'r': 0.9559613319011815},
{'a': 0.9270248596631917,
'f1': 0.9103448275862068,
'p': 0.8733459357277883,
'r': 0.9506172839506173},
{'a': 0.9404255319148936,
'f1': 0.851851851851852,
'p': 0.8341968911917098,
'r': 0.8702702702702703},
{'a': 0.9549382716049383,
'f1': 0.9581181870338497,
'p': 0.9619815668202765,
'r': 0.9542857142857143},
{'a': 0.9580137262817925,
'f1': 0.9697146185206756,
'p': 0.9714119019836639,
'r': 0.9680232558139535},
{'a': 0.9394673123486683,
'f1': 0.9440089585666294,
'p': 0.9366666666666666,
'r': 0.9514672686230248},
{'a': 0.9344746162927982,
'f1': 0.9410515135422199,
'p': 0.9267782426778243,
'r': 0.9557713052858684},
{'a': 0.5105755041810133,
'f1': 0.4271732872769142,
'p': 0.28277439024390244,
'r': 0.8729411764705882},
{'a': 0.9386454183266932,
'f1': 0.9216683621566633,
'p': 0.9114688128772636,
'r': 0.9320987654320988},
{'a': 0.627173213135866,
'f1': 0.5021496130696474,
'p': 0.34803337306317045,
'r': 0.9012345679012346},
{'a': 0.9426644182124789,
'f1': 0.9506292352371732,
'p': 0.9370229007633588,
'r': 0.9646365422396856},
{'a': 0.9452054794520548,
'f1': 0.9559902200488998,
'p': 0.9630541871921182,
'r': 0.9490291262135923},
{'a': 0.9378813089295619,
'f1': 0.946360153256705,
'p': 0.9285714285714286,
'r': 0.96484375},
{'a': 0.9535490605427975,
'f1': 0.9616213885295387,
'p': 0.948936170212766,
'r': 0.9746503496503497},
{'a': 0.6271008403361344,
'f1': 0.5862470862470862,
'p': 0.4280851063829787,
'r': 0.9297597042513863},
{'a': 0.9449612403100776,
'f1': 0.9086229086229086,
'p': 0.9145077720207254,
'r': 0.9028132992327366},
{'a': 0.9414389291689905,
'f1': 0.9415041782729805,
'p': 0.9378468368479467,
'r': 0.9451901565995525},
{'a': 0.9498181818181818,
'f1': 0.9332042594385286,
'p': 0.9323017408123792,
'r': 0.9341085271317829},
{'a': 0.9301221166892809,
'f1': 0.9286209286209286,
'p': 0.8993288590604027,
'r': 0.9598853868194842},
{'a': 0.9354838709677419,
'f1': 0.932415519399249,
'p': 0.9085365853658537,
'r': 0.9575835475578406},
{'a': 0.9311145510835913,
'f1': 0.9187214611872146,
'p': 0.8902654867256637,
'r': 0.9490566037735849},
{'a': 0.9412225705329154,
'f1': 0.9006622516556291,
'p': 0.9139784946236559,
'r': 0.8877284595300261},
{'a': 0.9436519258202568,
'f1': 0.9399239543726237,
'p': 0.9507692307692308,
'r': 0.9293233082706767}],
'www.nymag.com;2005': [{'a': 0.7711069418386491,
'f1': 0.7621832358674464,
'p': 0.6729776247848537,
'r': 0.8786516853932584},
{'a': 0.7747963584091998,
'f1': 0.7693817468105986,
'p': 0.6782006920415224,
'r': 0.8888888888888888},
{'a': 0.7776712985146143,
'f1': 0.7600827300930713,
'p': 0.6693989071038251,
'r': 0.8791866028708134},
{'a': 0.7718120805369127,
'f1': 0.7603036876355748,
'p': 0.6594543744120414,
'r': 0.8975672215108835},
{'a': 0.772093023255814,
'f1': 0.7591480065537956,
'p': 0.6575212866603595,
'r': 0.8979328165374677},
{'a': 0.6690590111642744,
'f1': 0.5300113250283126,
'p': 0.3848684210526316,
'r': 0.850909090909091},
{'a': 0.6889952153110048,
'f1': 0.6470131885182312,
'p': 0.4970202622169249,
'r': 0.9266666666666666},
{'a': 0.7529296875,
'f1': 0.7548449612403101,
'p': 0.6629787234042553,
'r': 0.876265466816648},
{'a': 0.7890173410404624,
'f1': 0.7859237536656892,
'p': 0.7030430220356768,
'r': 0.8909574468085106},
{'a': 0.8201140487299119,
'f1': 0.8394261915779732,
'p': 0.7699490662139219,
'r': 0.9226856561546287},
{'a': 0.7540029112081513,
'f1': 0.7907552620718118,
'p': 0.6794326241134752,
'r': 0.945705824284304},
{'a': 0.7821091505949939,
'f1': 0.8270921523933572,
'p': 0.7345286292654714,
'r': 0.9463487332339792},
{'a': 0.7581395348837209,
'f1': 0.6584564860426929,
'p': 0.5976154992548435,
'r': 0.7330895795246801},
{'a': 0.6695778748180495,
'f1': 0.6828132277596646,
'p': 0.5461997019374069,
'r': 0.9105590062111801},
{'a': 0.697495183044316,
'f1': 0.7186379928315413,
'p': 0.5905743740795287,
'r': 0.9176201372997712},
{'a': 0.7282120395327942,
'f1': 0.7632093933463796,
'p': 0.6482712765957447,
'r': 0.9276879162702188},
{'a': 0.8176943699731903,
'f1': 0.8482142857142857,
'p': 0.7840440165061898,
'r': 0.9238249594813615},
{'a': 0.7697462900909526,
'f1': 0.7593796898449224,
'p': 0.6704946996466431,
'r': 0.8754325259515571},
{'a': 0.7224770642201835,
'f1': 0.7547628698824482,
'p': 0.6363636363636364,
'r': 0.9272908366533864},
{'a': 0.7748896517900932,
'f1': 0.7610619469026549,
'p': 0.6688014638609332,
'r': 0.8828502415458938},
{'a': 0.7994902293967715,
'f1': 0.814026792750197,
'p': 0.7405017921146954,
'r': 0.9037620297462817},
{'a': 0.7799607072691552,
'f1': 0.7812499999999999,
'p': 0.6884681583476764,
'r': 0.9029345372460497},
{'a': 0.6740623349181194,
'f1': 0.6716338477913784,
'p': 0.531592249368155,
'r': 0.911849710982659},
{'a': 0.7434108527131783,
'f1': 0.602641056422569,
'p': 0.47992351816443596,
'r': 0.8096774193548387},
{'a': 0.6736401673640168,
'f1': 0.6729559748427673,
'p': 0.5358931552587646,
'r': 0.9042253521126761},
{'a': 0.768056968463886,
'f1': 0.7894736842105263,
'p': 0.6979591836734694,
'r': 0.9086078639744952}],
'www.nymag.com;2010': [{'a': 0.48481943112815595,
'f1': 0.0627906976744186,
'p': 0.037241379310344824,
'r': 0.2},
{'a': 0.44631901840490795,
'f1': 0.33419402434526,
'p': 0.20798898071625344,
'r': 0.849906191369606},
{'a': 0.3217094017094017,
'f1': 0.036893203883495145,
'p': 0.01954732510288066,
'r': 0.3275862068965517},
{'a': 0.311042524005487,
'f1': 0.02899951667472209,
'p': 0.015511892450879007,
'r': 0.2222222222222222},
{'a': 0.4183240952070427,
'f1': 0.22904062229904926,
'p': 0.1347914547304171,
'r': 0.7614942528735632},
{'a': 0.3187355943365163,
'f1': 0.1510053344275749,
'p': 0.08329560887279312,
'r': 0.8070175438596491},
{'a': 0.41282778171509565,
'f1': 0.043854587420657815,
'p': 0.023944549464398234,
'r': 0.2602739726027397},
{'a': 0.43229657555765,
'f1': 0.2786427145708583,
'p': 0.16941747572815535,
'r': 0.7842696629213484},
{'a': 0.3127237227465018,
'f1': 0.14146341463414633,
'p': 0.0777479892761394,
'r': 0.7837837837837838},
{'a': 0.30776762402088775,
'f1': 0.1360488798370672,
'p': 0.07435440783615316,
'r': 0.7990430622009569},
{'a': 0.5344157329064715,
'f1': 0.5479573712255773,
'p': 0.3932441045251753,
'r': 0.9033674963396779},
{'a': 0.3160771704180064,
'f1': 0.15829046299960425,
'p': 0.08798944126704795,
'r': 0.7874015748031497},
{'a': 0.3274732850741124,
'f1': 0.03938946331856228,
'p': 0.021197668256491786,
'r': 0.2777777777777778},
{'a': 0.309208290859667,
'f1': 0.031443544545021435,
'p': 0.016516516516516516,
'r': 0.32673267326732675},
{'a': 0.3213815789473684,
'f1': 0.16104107360715736,
'p': 0.0894713059195662,
'r': 0.8048780487804879},
{'a': 0.32691658223573117,
'f1': 0.15658061785865424,
'p': 0.08714083843617522,
'r': 0.7708333333333334},
{'a': 0.4134419551934827,
'f1': 0.187206020696143,
'p': 0.1067024128686327,
'r': 0.7624521072796935},
{'a': 0.4195666447800394,
'f1': 0.23130434782608694,
'p': 0.13516260162601626,
'r': 0.8012048192771084},
{'a': 0.5790219702338767,
'f1': 0.5123152709359605,
'p': 0.3659824046920821,
'r': 0.853625170998632},
{'a': 0.48916909149692855,
'f1': 0.05952380952380952,
'p': 0.03333333333333333,
'r': 0.2777777777777778},
{'a': 0.3778471138845554,
'f1': 0.3018207282913165,
'p': 0.18340425531914895,
'r': 0.8517786561264822},
{'a': 0.3234536082474227,
'f1': 0.16930379746835442,
'p': 0.09460654288240496,
'r': 0.8045112781954887},
{'a': 0.4106593782029382,
'f1': 0.18207681365576103,
'p': 0.10339256865912763,
'r': 0.7619047619047619},
{'a': 0.40123034859876966,
'f1': 0.04782608695652174,
'p': 0.026112759643916916,
'r': 0.2838709677419355},
{'a': 0.5376782077393075,
'f1': 0.5942806076854334,
'p': 0.44127405441274054,
'r': 0.9097127222982216}],
'www.nymag.com;2015': [{'a': 0.12131556489201077,
'f1': 0.0027319011548491245,
'p': 0.0013710582076529975,
'r': 0.36666666666666664},
{'a': 0.12269175361243288,
'f1': 0.003965008797363269,
'p': 0.001994266483858906,
'r': 0.33613445378151263},
{'a': 0.12294602844710008,
'f1': 0.02455937590291823,
'p': 0.01246913278403951,
'r': 0.8082408874801902},
{'a': 0.16712910070181167,
'f1': 0.05854498493327594,
'p': 0.030300136859861868,
'r': 0.8631006346328196},
{'a': 0.25470154326426825,
'f1': 0.25441998690374257,
'p': 0.14750459948018574,
'r': 0.9245835621453414},
{'a': 0.17664334917498797,
'f1': 0.0838928168260947,
'p': 0.04405891163255117,
'r': 0.8748451053283767},
{'a': 0.12178293724674187,
'f1': 0.0027855845996965704,
'p': 0.0013984966161376521,
'r': 0.34146341463414637},
{'a': 0.12372770769899956,
'f1': 0.005528816487720596,
'p': 0.0027873870734924466,
'r': 0.33532934131736525},
{'a': 0.12206695969734742,
'f1': 0.003672912271994044,
'p': 0.0018477826608070316,
'r': 0.29959514170040485},
{'a': 0.12218016322779686,
'f1': 0.0030812812166090995,
'p': 0.0015481035731229244,
'r': 0.31958762886597936},
{'a': 0.12040199256052729,
'f1': 0.01787622656174099,
'p': 0.009036884239477433,
'r': 0.8177777777777778},
{'a': 0.15947986577181208,
'f1': 0.004504355314145663,
'p': 0.002267724938304542,
'r': 0.3285024154589372},
{'a': 0.12283227537464826,
'f1': 0.004061818902318208,
'p': 0.002044123145955378,
'r': 0.31417624521072796},
{'a': 0.131848751352114,
'f1': 0.02022239324858682,
'p': 0.010238357563217155,
'r': 0.8141025641025641},
{'a': 0.294921875,
'f1': 0.34880605811648296,
'p': 0.21353597200962168,
'r': 0.9516616314199395},
{'a': 0.12296532587559265,
'f1': 0.004612294492523619,
'p': 0.0023225033089428865,
'r': 0.3274647887323944},
{'a': 0.12218213621952553,
'f1': 0.003573999156139088,
'p': 0.001795914294978923,
'r': 0.36},
{'a': 0.627518315018315,
'f1': 0.7415409054805402,
'p': 0.6018566271273853,
'r': 0.965659908978072},
{'a': 0.12394710426395496,
'f1': 0.005351568306823249,
'p': 0.002696965913347484,
'r': 0.34069400630914826},
{'a': 0.12461045612046985,
'f1': 0.0060868489422244215,
'p': 0.003071928071928072,
'r': 0.328},
{'a': 0.16345886410413307,
'f1': 0.009158293526601878,
'p': 0.004635187408296652,
'r': 0.3787465940054496},
{'a': 0.1570371188687583,
'f1': 0.030863021527910078,
'p': 0.01572306696861857,
'r': 0.8321917808219178},
{'a': 0.12216138391396349,
'f1': 0.003220133263976617,
'p': 0.001618002140741294,
'r': 0.3282828282828283},
{'a': 0.12024096385542168,
'f1': 0.0014917580368464236,
'p': 0.0007477380922708806,
'r': 0.3},
{'a': 0.12138728323699421,
'f1': 0.0031796502384737677,
'p': 0.001597444089456869,
'r': 0.3333333333333333},
{'a': 0.1540937213883417,
'f1': 0.02350831118086026,
'p': 0.011924876527164023,
'r': 0.8210290827740492}]}
[About 4035 more lines. Double-click to unfold]
>>> packages = [os.path.join(f,filename)
... for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
... for filename in os.listdir(f)
... if filename == "results.pkl"]
...
... def trim_results(domain_results):
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
... pickle.dump(os.path.split(domain_results)[:-1],open(os.path.join(wcbe_test,'trimmed.pkl'),'wb'))
...
... return part
...
... for pack in packages:
... res = trim_results(pack)
[About 17 more lines. Double-click to unfold]
Traceback (most recent call last):
File "<pyshell#54>", line 19, in <module>
res = trim_results(pack)
File "<pyshell#54>", line 8, in trim_results
part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
File "<pyshell#54>", line 8, in <dictcomp>
part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
AttributeError: 'str' object has no attribute 'items'
[About 7 more lines. Double-click to unfold]
>>> packages = [os.path.join(f,filename)
... for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
... for filename in os.listdir(f)
... if filename == "results.pkl"]
...
... def trim_results(domain_results):
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
... pickle.dump(os.path.split(domain_results)[:-1],open(os.path.join(wcbe_test,'trimmed.pkl'),'wb'))
...
... return part
...
... for pack in packages:
... res = trim_results(open(pack))
Traceback (most recent call last):
File "<pyshell#55>", line 18, in <module>
res = trim_results(open(pack))
File "<pyshell#55>", line 7, in trim_results
part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
File "<pyshell#55>", line 7, in <dictcomp>
part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
AttributeError: 'str' object has no attribute 'items'
>>> packages = [os.path.join(f,filename)
... for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
... for filename in os.listdir(f)
... if filename == "results.pkl"]
...
... def trim_results(domain_path):
... domain_results = open(domain_path)
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
... pickle.dump(os.path.split(domain_results)[:-1],open(os.path.join(wcbe_test,'trimmed.pkl'),'wb'))
...
... return part
...
... for pack in packages:
... res = trim_results(pack)
Traceback (most recent call last):
File "<pyshell#56>", line 19, in <module>
res = trim_results(pack)
File "<pyshell#56>", line 8, in trim_results
part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
File "<pyshell#56>", line 8, in <dictcomp>
part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
AttributeError: 'str' object has no attribute 'items'
>>> packages = [os.path.join(f,filename)
... for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
... for filename in os.listdir(f)
... if filename == "results.pkl"]
...
... def trim_results(domain_path):
... domain_results = pickle.load(open(domain_path))
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
... pickle.dump(os.path.split(domain_results)[:-1],open(os.path.join(wcbe_test,'trimmed.pkl'),'wb'))
...
... return part
...
... for pack in packages:
... res = trim_results(pack)
Traceback (most recent call last):
File "<pyshell#57>", line 19, in <module>
res = trim_results(pack)
File "<pyshell#57>", line 14, in trim_results
pickle.dump(os.path.split(domain_results)[:-1],open(os.path.join(wcbe_test,'trimmed.pkl'),'wb'))
File "c:\python27\lib\ntpath.py", line 173, in split
while i and p[i-1] not in '/\\':
TypeError: 'in <string>' requires string as left operand, not dict
>>> packages = [os.path.join(f,filename)
... for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
... for filename in os.listdir(f)
... if filename == "results.pkl"]
...
... def trim_results(domain_path):
... domain_results = pickle.load(open(domain_path))
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
... pickle.dump(os.path.split(domain_path)[:-1],open(os.path.join(domain_path,'trimmed.pkl'),'wb'))
...
... return part
...
... for pack in packages:
... res = trim_results(pack)
Traceback (most recent call last):
File "<pyshell#58>", line 19, in <module>
res = trim_results(pack)
File "<pyshell#58>", line 14, in trim_results
pickle.dump(os.path.split(domain_path)[:-1],open(os.path.join(domain_path,'trimmed.pkl'),'wb'))
IOError: [Errno 2] No such file or directory: 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\results.pkl\\trimmed.pkl'
>>> packages = [os.path.join(f,filename)
... for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
... for filename in os.listdir(f)
... if filename == "results.pkl"]
...
... def trim_results(domain_path):
... domain_results = pickle.load(open(domain_path))
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
... pickle.dump(domain_results,open(os.path.join(os.path.split(domain_path)[:-1],'trimmed.pkl'),'wb'))
...
... return part
...
... for pack in packages:
... res = trim_results(pack)
Traceback (most recent call last):
File "<pyshell#59>", line 19, in <module>
res = trim_results(pack)
File "<pyshell#59>", line 14, in trim_results
pickle.dump(domain_results,open(os.path.join(os.path.split(domain_path)[:-1],'trimmed.pkl'),'wb'))
File "c:\python27\lib\ntpath.py", line 108, in join
path += "\\" + b
TypeError: can only concatenate tuple (not "str") to tuple
>>> packages = [os.path.join(f,filename)
... for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
... for filename in os.listdir(f)
... if filename == "results.pkl"]
...
... def trim_results(domain_path):
... domain_results = pickle.load(open(domain_path))
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
...
... trimmed_path = os.path.join(os.path.split(domain_path)[:-1],'trimmed.pkl')
... print(trimmed_path)
...
...
... return part
...
... for pack in packages:
... res = trim_results(pack)
Traceback (most recent call last):
File "<pyshell#60>", line 22, in <module>
res = trim_results(pack)
File "<pyshell#60>", line 15, in trim_results
trimmed_path = os.path.join(os.path.split(domain_path)[:-1],'trimmed.pkl')
File "c:\python27\lib\ntpath.py", line 108, in join
path += "\\" + b
TypeError: can only concatenate tuple (not "str") to tuple
>>> packages = [os.path.join(f,filename)
... for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
... for filename in os.listdir(f)
... if filename == "results.pkl"]
...
... def trim_results(domain_path):
... domain_results = pickle.load(open(domain_path))
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
... domain_path = os.path.join(os.path.split(domain_path)[:-1])
... trimmed_path = os.path.join(domain_path,'trimmed.pkl')
... print(trimmed_path)
...
...
... return part
...
... for pack in packages:
... res = trim_results(pack)
Traceback (most recent call last):
File "<pyshell#61>", line 22, in <module>
res = trim_results(pack)
File "<pyshell#61>", line 15, in trim_results
trimmed_path = os.path.join(domain_path,'trimmed.pkl')
File "c:\python27\lib\ntpath.py", line 108, in join
path += "\\" + b
TypeError: can only concatenate tuple (not "str") to tuple
>>> packages = [os.path.join(f,filename)
... for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
... for filename in os.listdir(f)
... if filename == "results.pkl"]
...
... def trim_results(domain_path):
... domain_results = pickle.load(open(domain_path))
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
... domain_path = os.path.join(os.path.split(domain_path)[:-1])
... trimmed_path = os.path.join(domain_path,'trimmed.pkl')
... print(trimmed_path)
...
...
... return part
>>> for pack in packages:
... res = trim_results(pack)
Traceback (most recent call last):
File "<pyshell#63>", line 3, in <module>
res = trim_results(pack)
File "<pyshell#62>", line 15, in trim_results
trimmed_path = os.path.join(domain_path,'trimmed.pkl')
File "c:\python27\lib\ntpath.py", line 108, in join
path += "\\" + b
TypeError: can only concatenate tuple (not "str") to tuple
>>> packages = [os.path.join(f,filename)
... for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
... for filename in os.listdir(f)
... if filename == "results.pkl"]
...
... def trim_results(domain_path):
... domain_results = pickle.load(open(domain_path))
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
... domain_path = os.path.split(domain_path)[:-1]
...
... trimmed_path = os.path.join(domain_path,'trimmed.pkl')
... print(trimmed_path)
...
...
... return part
>>> for pack in packages:
... res = trim_results(pack)
Traceback (most recent call last):
File "<pyshell#65>", line 2, in <module>
res = trim_results(pack)
File "<pyshell#64>", line 16, in trim_results
trimmed_path = os.path.join(domain_path,'trimmed.pkl')
File "c:\python27\lib\ntpath.py", line 108, in join
path += "\\" + b
TypeError: can only concatenate tuple (not "str") to tuple
>>> packages = [os.path.join(f,filename)
... for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
... for filename in os.listdir(f)
... if filename == "results.pkl"]
...
... def trim_results(domain_path):
... domain_results = pickle.load(open(domain_path))
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
... domain_path = os.path.join(*(os.path.split(domain_path)[:-1]))
...
... trimmed_path = os.path.join(domain_path,'trimmed.pkl')
... print(trimmed_path)
...
...
... return part
>>> for pack in packages:
... res = trim_results(pack)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter\trimmed.pkl
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter\trimmed.pkl
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter\trimmed.pkl
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter\trimmed.pkl
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter\trimmed.pkl
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter\trimmed.pkl
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB\trimmed.pkl
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter\trimmed.pkl
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter\trimmed.pkl
>>> packages = [os.path.join(f,filename)
... for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
... for filename in os.listdir(f)
... if filename == "results.pkl"]
...
... def trim_results(domain_path):
... domain_results = pickle.load(open(domain_path))
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
... domain_path = os.path.join(*(os.path.split(domain_path)[:-1]))
...
... trimmed_path = os.path.join(domain_path,'trimmed.pkl')
...
... pickle.dump(part,open(trimmed_path,'wb'))
...
... return part
>>> for pack in packages:
... res = trim_results(pack)
>>> res
16: {'entertainment.msn.com;2000': [{'a': 0.8906115417743324,
'f1': 0.7434343434343433,
'p': 0.6411149825783972,
'r': 0.8846153846153846},
{'a': 0.8646080760095012,
'f1': 0.7381316998468606,
'p': 0.6731843575418994,
'r': 0.8169491525423729},
{'a': 0.8780487804878049,
'f1': 0.8494711147274208,
'p': 0.786144578313253,
'r': 0.9238938053097345},
{'a': 0.8459657701711492,
'f1': 0.7060653188180405,
'p': 0.58656330749354,
'r': 0.88671875},
{'a': 0.870575221238938,
'f1': 0.7719298245614035,
'p': 0.673469387755102,
'r': 0.9041095890410958},
{'a': 0.8930817610062893,
'f1': 0.8034682080924855,
'p': 0.7202072538860104,
'r': 0.9084967320261438},
{'a': 0.8891228070175439,
'f1': 0.8704918032786886,
'p': 0.8219814241486069,
'r': 0.9250871080139372},
{'a': 0.8364406779661017,
'f1': 0.6666666666666666,
'p': 0.5376044568245125,
'r': 0.8772727272727273},
{'a': 0.8608695652173913,
'f1': 0.7743589743589743,
'p': 0.6817155756207675,
'r': 0.8961424332344213},
{'a': 0.8369659982563208,
'f1': 0.6175869120654397,
'p': 0.4886731391585761,
'r': 0.8388888888888889},
{'a': 0.8708771929824561,
'f1': 0.8491803278688524,
'p': 0.7719821162444114,
'r': 0.9435336976320583},
{'a': 0.8987012987012987,
'f1': 0.8276877761413844,
'p': 0.7473404255319149,
'r': 0.9273927392739274},
{'a': 0.9304979253112033,
'f1': 0.9330669330669331,
'p': 0.9192913385826772,
'r': 0.947261663286004},
{'a': 0.8870168483647175,
'f1': 0.4622641509433962,
'p': 0.3202614379084967,
'r': 0.8305084745762712},
{'a': 0.9012016021361816,
'f1': 0.8435517970401691,
'p': 0.7823529411764706,
'r': 0.9151376146788991},
{'a': 0.892325996430696,
'f1': 0.8507831821929102,
'p': 0.8087774294670846,
'r': 0.8973913043478261},
{'a': 0.8938181818181818,
'f1': 0.8089005235602095,
'p': 0.7322274881516587,
'r': 0.9035087719298246},
{'a': 0.8429319371727748,
'f1': 0.6875000000000001,
'p': 0.5739130434782609,
'r': 0.8571428571428571},
{'a': 0.8483455882352942,
'f1': 0.6405228758169934,
'p': 0.5176056338028169,
'r': 0.84},
{'a': 0.8430629264594389,
'f1': 0.7389659520807061,
'p': 0.6369565217391304,
'r': 0.8798798798798799},
{'a': 0.8876058506543495,
'f1': 0.8519269776876267,
'p': 0.7909604519774012,
'r': 0.9230769230769231},
{'a': 0.8882480173035328,
'f1': 0.8651000870322019,
'p': 0.8094462540716613,
'r': 0.9289719626168225},
{'a': 0.8997756170531039,
'f1': 0.8404761904761906,
'p': 0.7741228070175439,
'r': 0.9192708333333334},
{'a': 0.8988023952095808,
'f1': 0.8668242710795901,
'p': 0.8308157099697885,
'r': 0.9060955518945635},
{'a': 0.9415675297410777,
'f1': 0.9534689328503761,
'p': 0.9406267179769104,
'r': 0.9666666666666667}],
'entertainment.msn.com;2005': [{'a': 0.6592920353982301,
'f1': 0.6782729805013928,
'p': 0.5276273022751896,
'r': 0.949317738791423},
{'a': 0.47560975609756095,
'f1': 0.2268326417704011,
'p': 0.13078149920255183,
'r': 0.8541666666666666},
{'a': 0.3941256830601093,
'f1': 0.23070251517779705,
'p': 0.1339375629405841,
'r': 0.83125},
{'a': 0.6104417670682731,
'f1': 0.5481366459627329,
'p': 0.38748627881448955,
'r': 0.9363395225464191},
{'a': 0.6564065855404438,
'f1': 0.6821192052980133,
'p': 0.533678756476684,
'r': 0.944954128440367},
{'a': 0.6042356055592323,
'f1': 0.565406976744186,
'p': 0.4142705005324814,
'r': 0.8901601830663616},
{'a': 0.5637583892617449,
'f1': 0.5584905660377358,
'p': 0.3952991452991453,
'r': 0.9511568123393316},
{'a': 0.676510989010989,
'f1': 0.7097966728280961,
'p': 0.5663716814159292,
'r': 0.9504950495049505},
{'a': 0.6493416493416494,
'f1': 0.5845648604269293,
'p': 0.4336175395858709,
'r': 0.8967254408060453},
{'a': 0.7849044379656624,
'f1': 0.8557775847089487,
'p': 0.7600308641975309,
'r': 0.9791252485089463},
{'a': 0.7558179923584578,
'f1': 0.8346271465537521,
'p': 0.7282430213464697,
'r': 0.9774104683195592},
{'a': 0.5922459893048129,
'f1': 0.6099744245524297,
'p': 0.4462114125350795,
'r': 0.9636363636363636},
{'a': 0.5947666195190948,
'f1': 0.6136210384356036,
'p': 0.45183714001986097,
'r': 0.9558823529411765},
{'a': 0.7231740306582507,
'f1': 0.7797704447632713,
'p': 0.6579903147699758,
'r': 0.9568661971830986},
{'a': 0.5304878048780488,
'f1': 0.40655105973025046,
'p': 0.2650753768844221,
'r': 0.871900826446281},
{'a': 0.5057471264367817,
'f1': 0.3156498673740053,
'p': 0.19101123595505617,
'r': 0.9083969465648855},
{'a': 0.6488956587966489,
'f1': 0.6612784717119765,
'p': 0.5084745762711864,
'r': 0.9453781512605042},
{'a': 0.41633935585231735,
'f1': 0.25176233635448136,
'p': 0.1456876456876457,
'r': 0.9259259259259259},
{'a': 0.6038205980066446,
'f1': 0.5714285714285715,
'p': 0.4103225806451613,
'r': 0.9408284023668639},
{'a': 0.625,
'f1': 0.6229508196721312,
'p': 0.4666666666666667,
'r': 0.9366197183098591},
{'a': 0.5136157337367625,
'f1': 0.32101372756071805,
'p': 0.1958762886597938,
'r': 0.8888888888888888},
{'a': 0.5411415863602669,
'f1': 0.5137470542026709,
'p': 0.3538961038961039,
'r': 0.9369627507163324},
{'a': 0.6026402640264027,
'f1': 0.5333333333333333,
'p': 0.3739130434782609,
'r': 0.9297297297297298},
{'a': 0.6629345904537419,
'f1': 0.6908108108108107,
'p': 0.5392405063291139,
'r': 0.9609022556390977},
{'a': 0.5772113943028486,
'f1': 0.5765765765765767,
'p': 0.41423948220064727,
'r': 0.9481481481481482},
{'a': 0.385470719051149,
'f1': 0.22595704948646128,
'p': 0.12872340425531914,
'r': 0.9236641221374046},
{'a': 0.6125511596180082,
'f1': 0.6306892067620286,
'p': 0.47041707080504364,
'r': 0.9566074950690335}],
'entertainment.msn.com;2010': [{'a': 0.3064839572192513,
'f1': 0.1622930964876867,
'p': 0.08917480035492457,
'r': 0.9013452914798207},
{'a': 0.4191103789126853,
'f1': 0.4065971053517335,
'p': 0.2616984402079723,
'r': 0.9110105580693816},
{'a': 0.5764542936288088,
'f1': 0.5144490314385519,
'p': 0.3629032258064516,
'r': 0.8833151581243184},
{'a': 0.7329272202629048,
'f1': 0.6785025086839058,
'p': 0.5359756097560976,
'r': 0.9242902208201893},
{'a': 0.3133749730777515,
'f1': 0.31470335339638866,
'p': 0.18929402637703646,
'r': 0.932484076433121},
{'a': 0.47967145790554416,
'f1': 0.2925739810161921,
'p': 0.17420212765957446,
'r': 0.9128919860627178},
{'a': 0.134210275474216,
'f1': 0.10819833087874324,
'p': 0.057491652754590984,
'r': 0.9168053244592346},
{'a': 0.6119540229885058,
'f1': 0.5427952329360779,
'p': 0.3969889064976228,
'r': 0.8578767123287672},
{'a': 0.6968280467445743,
'f1': 0.5920934411500449,
'p': 0.44436952124072826,
'r': 0.8869448183041723},
{'a': 0.6994342291371994,
'f1': 0.6072088724584103,
'p': 0.45656706045865186,
'r': 0.9062068965517242},
{'a': 0.6474788041053101,
'f1': 0.6085232903865213,
'p': 0.46026986506746626,
'r': 0.8976608187134503},
{'a': 0.33048703352308667,
'f1': 0.24094657583363213,
'p': 0.13895781637717122,
'r': 0.9056603773584906},
{'a': 0.1698961937716263,
'f1': 0.18801827720426464,
'p': 0.1044172932330827,
'r': 0.9431239388794567},
{'a': 0.3765453903214412,
'f1': 0.31403031480761756,
'p': 0.1902967498822421,
'r': 0.8977777777777778},
{'a': 0.6084821428571429,
'f1': 0.55094726062468,
'p': 0.4008941877794337,
'r': 0.8805237315875614},
{'a': 0.40941248846508765,
'f1': 0.4124847001223991,
'p': 0.2631784459195627,
'r': 0.9533239038189534},
{'a': 0.7261870912709696,
'f1': 0.7834495165279964,
'p': 0.6669218989280244,
'r': 0.9493188010899183},
{'a': 0.13030746705710103,
'f1': 0.1346153846153846,
'p': 0.0724535403434486,
'r': 0.9476923076923077},
{'a': 0.5841584158415841,
'f1': 0.4295415959252971,
'p': 0.28331466965285557,
'r': 0.887719298245614},
{'a': 0.6314063140631406,
'f1': 0.6103164282618119,
'p': 0.4674634794156706,
'r': 0.8789013732833958},
{'a': 0.6747729270436567,
'f1': 0.7421003717472119,
'p': 0.6028690071725179,
'r': 0.9649546827794562},
{'a': 0.4391700723043068,
'f1': 0.45309625996321273,
'p': 0.2995541143088772,
'r': 0.929559748427673},
{'a': 0.6630686198920586,
'f1': 0.6746090841399851,
'p': 0.5338833235120801,
'r': 0.916076845298281},
{'a': 0.36177972283005105,
'f1': 0.3162281844230268,
'p': 0.19106074913440352,
'r': 0.9169184290030211},
{'a': 0.6180639935196436,
'f1': 0.5901781833985223,
'p': 0.4458305975049245,
'r': 0.8727506426735219}],
'entertainment.msn.com;2015': [{'a': 0.5683226852293316,
'f1': 0.2887666928515318,
'p': 0.17277683775145705,
'r': 0.878585086042065},
{'a': 0.5369417575632847,
'f1': 0.08200734394124846,
'p': 0.043086816720257236,
'r': 0.8481012658227848},
{'a': 0.5547539039335837,
'f1': 0.20251371924234376,
'p': 0.11458333333333333,
'r': 0.8706240487062404},
{'a': 0.535630062544858,
'f1': 0.08003250050782043,
'p': 0.04201322243548731,
'r': 0.8418803418803419},
{'a': 0.5361267245784364,
'f1': 0.0854321982671771,
'p': 0.045039303165498194,
'r': 0.828125},
{'a': 0.5328726755411035,
'f1': 0.08553809429082952,
'p': 0.04503560955173858,
'r': 0.849802371541502},
{'a': 0.4699380338301792,
'f1': 0.16710526315789473,
'p': 0.09197566628041715,
'r': 0.9123563218390804},
{'a': 0.5338477366255144,
'f1': 0.06287487073422958,
'p': 0.03275156216332687,
'r': 0.7835051546391752},
{'a': 0.5316589684372595,
'f1': 0.10942360475754805,
'p': 0.058352849336455896,
'r': 0.8768328445747801},
{'a': 0.5303261200853399,
'f1': 0.07410374524334068,
'p': 0.03875157100963553,
'r': 0.8447488584474886},
{'a': 0.4413022606266911,
'f1': 0.07912530571140843,
'p': 0.0413596029478117,
'r': 0.9105960264900662},
{'a': 0.539616516181394,
'f1': 0.11574434918160562,
'p': 0.06206896551724138,
'r': 0.8559077809798271},
{'a': 0.5308411214953271,
'f1': 0.15518157661647475,
'p': 0.08481797056545314,
'r': 0.9106029106029107},
{'a': 0.518874332848132,
'f1': 0.10923463887890765,
'p': 0.05841660261337433,
'r': 0.8397790055248618},
{'a': 0.48843044326735435,
'f1': 0.04708326120823957,
'p': 0.024207903168387328,
'r': 0.8553459119496856},
{'a': 0.5043800234805382,
'f1': 0.18235995232419547,
'p': 0.1017794777981041,
'r': 0.8755364806866953},
{'a': 0.5360209424083769,
'f1': 0.04771115409413282,
'p': 0.024562956406284576,
'r': 0.8283582089552238},
{'a': 0.47771703882145833,
'f1': 0.13049514113836186,
'p': 0.07018417122946739,
'r': 0.9276315789473685},
{'a': 0.495072958120144,
'f1': 0.08890408616857583,
'p': 0.04679625629949604,
'r': 0.8873720136518771},
{'a': 0.4821863051790059,
'f1': 0.16223815548994797,
'p': 0.08940192128912303,
'r': 0.8755690440060698},
{'a': 0.5582546108861899,
'f1': 0.32331863285556783,
'p': 0.1981753674607197,
'r': 0.8773373223635004},
{'a': 0.5435072353389185,
'f1': 0.16998442097974728,
'p': 0.09409735530854733,
'r': 0.8783542039355993},
{'a': 0.5293517100561511,
'f1': 0.0599510603588907,
'p': 0.03106508875739645,
'r': 0.8546511627906976},
{'a': 0.5290077900779008,
'f1': 0.05666187641141449,
'p': 0.029367950627793148,
'r': 0.8023255813953488},
{'a': 0.5468036529680366,
'f1': 0.18424657534246575,
'p': 0.102868068833652,
'r': 0.8819672131147541}],
'news.bbc.co.uk;2000': [{'a': 0.7616099071207431,
'f1': 0.681159420289855,
'p': 0.5474209650582362,
'r': 0.9013698630136986},
{'a': 0.7200282087447109,
'f1': 0.64773735581189,
'p': 0.5027548209366391,
'r': 0.9102244389027432},
{'a': 0.8307952622673435,
'f1': 0.8392282958199357,
'p': 0.772189349112426,
'r': 0.9190140845070423},
{'a': 0.8250336473755047,
'f1': 0.8163841807909603,
'p': 0.7297979797979798,
'r': 0.9262820512820513},
{'a': 0.8064911206368647,
'f1': 0.811904761904762,
'p': 0.7052740434332989,
'r': 0.9565217391304348},
{'a': 0.7923124612523249,
'f1': 0.750186428038777,
'p': 0.6490322580645161,
'r': 0.8886925795053003},
{'a': 0.7744186046511627,
'f1': 0.7045685279187817,
'p': 0.5792988313856428,
'r': 0.8989637305699482},
{'a': 0.8402777777777778,
'f1': 0.7803547066848567,
'p': 0.7096774193548387,
'r': 0.8666666666666667},
{'a': 0.7555555555555555,
'f1': 0.651685393258427,
'p': 0.5307820299500832,
'r': 0.843915343915344},
{'a': 0.8308243727598567,
'f1': 0.8135860979462874,
'p': 0.7223001402524544,
'r': 0.9312839059674503},
{'a': 0.7553269654665687,
'f1': 0.6819484240687679,
'p': 0.5467075038284839,
'r': 0.9060913705583756},
{'a': 0.7630180658873539,
'f1': 0.7565502183406113,
'p': 0.651928504233302,
'r': 0.9011703511053316},
{'a': 0.7930489731437599,
'f1': 0.6804878048780488,
'p': 0.5670731707317073,
'r': 0.850609756097561},
{'a': 0.7722122838401908,
'f1': 0.7506527415143603,
'p': 0.636766334440753,
'r': 0.9141494435612083},
{'a': 0.8095582910934106,
'f1': 0.7553488372093023,
'p': 0.6527331189710611,
'r': 0.8962472406181016},
{'a': 0.8171905067350866,
'f1': 0.7822765469824293,
'p': 0.7013698630136986,
'r': 0.8842832469775475},
{'a': 0.7904387688277669,
'f1': 0.7398373983739838,
'p': 0.6435643564356436,
'r': 0.869980879541109},
{'a': 0.7834394904458599,
'f1': 0.7817969661610268,
'p': 0.6836734693877551,
'r': 0.9128065395095368},
{'a': 0.7916213275299239,
'f1': 0.8054850177755205,
'p': 0.7036379769299024,
'r': 0.9418052256532067},
{'a': 0.7400976668475312,
'f1': 0.7386797599563557,
'p': 0.6154545454545455,
'r': 0.9236016371077762},
{'a': 0.774746192893401,
'f1': 0.753984753984754,
'p': 0.6430260047281324,
'r': 0.9112227805695142},
{'a': 0.7850799289520426,
'f1': 0.7760641579272055,
'p': 0.6756176154672395,
'r': 0.9115942028985508},
{'a': 0.8228829993535876,
'f1': 0.8123287671232877,
'p': 0.724053724053724,
'r': 0.9251170046801872},
{'a': 0.7725988700564972,
'f1': 0.7145390070921986,
'p': 0.5926470588235294,
'r': 0.8995535714285714},
{'a': 0.7506544502617801,
'f1': 0.7124528301886792,
'p': 0.5791411042944785,
'r': 0.9254901960784314}],
'news.bbc.co.uk;2005': [{'a': 0.780718336483932,
'f1': 0.6547619047619048,
'p': 0.5104408352668214,
'r': 0.9128630705394191},
{'a': 0.7576711250983478,
'f1': 0.6688172043010752,
'p': 0.5289115646258503,
'r': 0.9093567251461988},
{'a': 0.7484450587422253,
'f1': 0.7283582089552239,
'p': 0.636245110821382,
'r': 0.8516579406631762},
{'a': 0.8109146810146042,
'f1': 0.7743119266055046,
'p': 0.6730462519936204,
'r': 0.9114470842332614},
{'a': 0.7868354430379747,
'f1': 0.8092433167195288,
'p': 0.7144,
'r': 0.9331243469174504},
{'a': 0.8159235668789809,
'f1': 0.813428018076178,
'p': 0.726643598615917,
'r': 0.9237536656891495},
{'a': 0.8688995215311005,
'f1': 0.8910969793322735,
'p': 0.8453996983408748,
'r': 0.9420168067226891},
{'a': 0.7826393789696542,
'f1': 0.7353951890034365,
'p': 0.6257309941520468,
'r': 0.8916666666666667},
{'a': 0.7559572719802794,
'f1': 0.6485207100591717,
'p': 0.5074074074074074,
'r': 0.898360655737705},
{'a': 0.7923375902276513,
'f1': 0.7940528634361232,
'p': 0.7096456692913385,
'r': 0.90125},
{'a': 0.7991483321504613,
'f1': 0.7686017988552739,
'p': 0.6743185078909613,
'r': 0.8935361216730038},
{'a': 0.7759146341463414,
'f1': 0.6981519507186859,
'p': 0.5802047781569966,
'r': 0.8762886597938144},
{'a': 0.7913468248429868,
'f1': 0.7442258340461934,
'p': 0.673374613003096,
'r': 0.8317399617590823},
{'a': 0.7815646785437645,
'f1': 0.7157258064516129,
'p': 0.6027164685908319,
'r': 0.8808933002481389},
{'a': 0.7394023627519111,
'f1': 0.6411483253588516,
'p': 0.5114503816793893,
'r': 0.8589743589743589},
{'a': 0.7716599190283401,
'f1': 0.6795454545454547,
'p': 0.5456204379562044,
'r': 0.9006024096385542},
{'a': 0.7431340872374798,
'f1': 0.6353211009174311,
'p': 0.49113475177304966,
'r': 0.8993506493506493},
{'a': 0.8087625814091178,
'f1': 0.8096641131408367,
'p': 0.7231578947368421,
'r': 0.9196787148594378},
{'a': 0.6388384754990926,
'f1': 0.4944961896697714,
'p': 0.35393939393939394,
'r': 0.8202247191011236},
{'a': 0.8077144502014968,
'f1': 0.8217716115261472,
'p': 0.7291666666666666,
'r': 0.941320293398533},
{'a': 0.8449496831904585,
'f1': 0.8765578635014837,
'p': 0.832112676056338,
'r': 0.9260188087774295},
{'a': 0.7524590163934426,
'f1': 0.6026315789473683,
'p': 0.4626262626262626,
'r': 0.8641509433962264},
{'a': 0.7988565488565489,
'f1': 0.8056253139126067,
'p': 0.7277676950998185,
'r': 0.9021372328458943},
{'a': 0.6827810972297664,
'f1': 0.6363636363636365,
'p': 0.4871306005719733,
'r': 0.9174147217235189},
{'a': 0.7845791642142437,
'f1': 0.7662835249042146,
'p': 0.6578947368421053,
'r': 0.9174311926605505}],
'news.bbc.co.uk;2010': [{'a': 0.7047244094488189,
'f1': 0.6624156039009752,
'p': 0.5484472049689441,
'r': 0.8361742424242424},
{'a': 0.6772616136919315,
'f1': 0.5031367628607278,
'p': 0.3692449355432781,
'r': 0.7893700787401575},
{'a': 0.6189735614307932,
'f1': 0.3851944792973651,
'p': 0.2614991482112436,
'r': 0.7309523809523809},
{'a': 0.6716831683168317,
'f1': 0.5003013863773358,
'p': 0.3618134263295554,
'r': 0.810546875},
{'a': 0.6613508442776735,
'f1': 0.26024590163934425,
'p': 0.15796019900497513,
'r': 0.7383720930232558},
{'a': 0.6494109246697608,
'f1': 0.5305927342256215,
'p': 0.39389638041163944,
'r': 0.8125915080527086},
{'a': 0.6159473299195318,
'f1': 0.16534181240063592,
'p': 0.0936936936936937,
'r': 0.7027027027027027},
{'a': 0.6481947942905122,
'f1': 0.3997134670487107,
'p': 0.2685274302213667,
'r': 0.7815126050420168},
{'a': 0.6486486486486487,
'f1': 0.37241379310344824,
'p': 0.24446680080482897,
'r': 0.7813504823151125},
{'a': 0.7074209245742092,
'f1': 0.6858262573481384,
'p': 0.5740841990158556,
'r': 0.851581508515815},
{'a': 0.6693476318141197,
'f1': 0.3874172185430464,
'p': 0.2597114317425083,
'r': 0.762214983713355},
{'a': 0.6954528085594192,
'f1': 0.5825039287585123,
'p': 0.4524003254678601,
'r': 0.8176470588235294},
{'a': 0.6480167014613779,
'f1': 0.39656406585540444,
'p': 0.2702439024390244,
'r': 0.7446236559139785},
{'a': 0.6581896551724138,
'f1': 0.41215715344699777,
'p': 0.28024193548387094,
'r': 0.7787114845938375},
{'a': 0.6192504258943782,
'f1': 0.24620573355817876,
'p': 0.1454183266932271,
'r': 0.8021978021978022},
{'a': 0.6633199464524766,
'f1': 0.5880425880425879,
'p': 0.45674300254452926,
'r': 0.825287356321839},
{'a': 0.676737160120846,
'f1': 0.4758572428271519,
'p': 0.3346456692913386,
'r': 0.8232445520581114},
{'a': 0.6253462603878116,
'f1': 0.47678916827852996,
'p': 0.3440334961618981,
'r': 0.7763779527559055},
{'a': 0.6544540229885057,
'f1': 0.5251727541954591,
'p': 0.3908890521675239,
'r': 0.8},
{'a': 0.7061418391584663,
'f1': 0.6563492063492063,
'p': 0.533204384268214,
'r': 0.8534571723426213},
{'a': 0.7225452196382429,
'f1': 0.6978543791769258,
'p': 0.5842167255594818,
'r': 0.8663755458515284},
{'a': 0.6513490725126475,
'f1': 0.415547703180212,
'p': 0.2826923076923077,
'r': 0.784},
{'a': 0.7019822282980178,
'f1': 0.643499591169256,
'p': 0.5208471211118465,
'r': 0.841711229946524},
{'a': 0.6480090157776108,
'f1': 0.4837465564738291,
'p': 0.3478605388272583,
'r': 0.7938517179023508},
{'a': 0.6538942107455227,
'f1': 0.41022001419446413,
'p': 0.27895752895752896,
'r': 0.774798927613941}],
'news.bbc.co.uk;2015': [{'a': 0.53717710860568,
'f1': 0.23315204540080398,
'p': 0.13641394576646376,
'r': 0.8016260162601626},
{'a': 0.5360839359137956,
'f1': 0.20039100684261976,
'p': 0.11546043368065334,
'r': 0.7578558225508318},
{'a': 0.5623914661374348,
'f1': 0.37711864406779666,
'p': 0.24461749885478698,
'r': 0.8228043143297381},
{'a': 0.521271897541587,
'f1': 0.13648433351035583,
'p': 0.07525622254758418,
'r': 0.7321937321937322},
{'a': 0.5604934996020164,
'f1': 0.3612878349720455,
'p': 0.22943192948090108,
'r': 0.8495013599274706},
{'a': 0.5230859146697837,
'f1': 0.1786612984398591,
'p': 0.10186513629842181,
'r': 0.7259713701431493},
{'a': 0.5347887323943662,
'f1': 0.19927272727272727,
'p': 0.11480446927374302,
'r': 0.7541284403669725},
{'a': 0.5422015842665938,
'f1': 0.24572457245724574,
'p': 0.1465771812080537,
'r': 0.7593880389429764},
{'a': 0.5226997578692494,
'f1': 0.1100451467268623,
'p': 0.05939689308559245,
'r': 0.7471264367816092},
{'a': 0.5206686930091186,
'f1': 0.16472457627118645,
'p': 0.09217545939537641,
'r': 0.7736318407960199},
{'a': 0.46053439803439805,
'f1': 0.06094627105052124,
'p': 0.03185247275775356,
'r': 0.7037037037037037},
{'a': 0.49977514615499924,
'f1': 0.07741222007188277,
'p': 0.040983606557377046,
'r': 0.6965174129353234},
{'a': 0.5491543340380549,
'f1': 0.29211618257261407,
'p': 0.17890724269377384,
'r': 0.7954802259887006},
{'a': 0.5331188770288053,
'f1': 0.15640686922060765,
'p': 0.08718703976435935,
'r': 0.7589743589743589},
{'a': 0.5327915597376676,
'f1': 0.19265829021926584,
'p': 0.10970819304152638,
'r': 0.7898989898989899},
{'a': 0.5288140513862679,
'f1': 0.14488935721812435,
'p': 0.08071617258585266,
'r': 0.7069408740359897},
{'a': 0.5543024227234754,
'f1': 0.25782517968931135,
'p': 0.15548098434004473,
'r': 0.7544097693351425},
{'a': 0.5470580492299592,
'f1': 0.30917486448504317,
'p': 0.19026439337781073,
'r': 0.8244111349036403},
{'a': 0.5460943969086393,
'f1': 0.2940545181369393,
'p': 0.1785248892363826,
'r': 0.8333333333333334},
{'a': 0.47740040858018384,
'f1': 0.22407582938388623,
'p': 0.1293499671700591,
'r': 0.8371104815864022},
{'a': 0.5369387462343996,
'f1': 0.23032904148783978,
'p': 0.13412940849763955,
'r': 0.8145025295109612},
{'a': 0.5356658529970165,
'f1': 0.22287789378120745,
'p': 0.13124832932371025,
'r': 0.7383458646616541},
{'a': 0.5498366013071896,
'f1': 0.3169421487603306,
'p': 0.19481838963677928,
'r': 0.8493909191583611},
{'a': 0.5415730337078651,
'f1': 0.24198792382721784,
'p': 0.14273972602739726,
'r': 0.7942073170731707},
{'a': 0.49390962671905697,
'f1': 0.15151515151515152,
'p': 0.084228515625,
'r': 0.7532751091703057}],
'news.yahoo.com;2000': [{'a': 0.7793715846994536,
'f1': 0.8222344523940562,
'p': 0.7175792507204611,
'r': 0.9626288659793815},
{'a': 0.6867924528301886,
'f1': 0.7147766323024055,
'p': 0.5695509309967142,
'r': 0.959409594095941},
{'a': 0.76274328081557,
'f1': 0.7710196779964222,
'p': 0.6442451420029895,
'r': 0.9599109131403119},
{'a': 0.734504132231405,
'f1': 0.7128491620111731,
'p': 0.5646017699115045,
'r': 0.9666666666666667},
{'a': 0.7410546139359698,
'f1': 0.7417840375586855,
'p': 0.6058282208588958,
'r': 0.9564164648910412},
{'a': 0.7545195052331113,
'f1': 0.7566037735849057,
'p': 0.6275430359937402,
'r': 0.9524940617577197},
{'a': 0.7660633484162896,
'f1': 0.8342417441487656,
'p': 0.7308988764044944,
'r': 0.9716206123973115},
{'a': 0.64543429844098,
'f1': 0.7258953168044078,
'p': 0.578167855183763,
'r': 0.9750231267345051},
{'a': 0.6236141906873615,
'f1': 0.6828584773470342,
'p': 0.5251436781609196,
'r': 0.9759679572763685},
{'a': 0.6191256830601093,
'f1': 0.6571569109690113,
'p': 0.5045317220543807,
'r': 0.9421720733427362},
{'a': 0.7083333333333334,
'f1': 0.7067395264116576,
'p': 0.5631349782293179,
'r': 0.9486552567237164},
{'a': 0.8024922118380062,
'f1': 0.8491194669205141,
'p': 0.7514743049705139,
'r': 0.975929978118162},
{'a': 0.6774500475737393,
'f1': 0.6537282941777324,
'p': 0.4976671850699845,
'r': 0.9523809523809523},
{'a': 0.7497789566755084,
'f1': 0.7631799163179915,
'p': 0.6307053941908713,
'r': 0.9661016949152542},
{'a': 0.8020086083213773,
'f1': 0.8608870967741936,
'p': 0.7656903765690377,
'r': 0.9831158864159631},
{'a': 0.6323863636363637,
'f1': 0.6890917827967323,
'p': 0.532293986636971,
'r': 0.9768392370572208},
{'a': 0.6868076535750252,
'f1': 0.6493799323562571,
'p': 0.49740932642487046,
'r': 0.935064935064935},
{'a': 0.6993006993006993,
'f1': 0.6172106824925815,
'p': 0.45614035087719296,
'r': 0.9541284403669725},
{'a': 0.74784276126558,
'f1': 0.7468719923002887,
'p': 0.6081504702194357,
'r': 0.9675810473815462},
{'a': 0.5307364576993305,
'f1': 0.5525246662797446,
'p': 0.3892068683565004,
'r': 0.952},
{'a': 0.4749568221070812,
'f1': 0.33913043478260874,
'p': 0.20772303595206393,
'r': 0.9230769230769231},
{'a': 0.7070957095709571,
'f1': 0.7237354085603114,
'p': 0.58125,
'r': 0.9587628865979382},
{'a': 0.6900212314225053,
'f1': 0.6386138613861386,
'p': 0.47866419294990725,
'r': 0.9591078066914498},
{'a': 0.7902621722846442,
'f1': 0.8256537982565381,
'p': 0.7152103559870551,
'r': 0.9764359351988218},
{'a': 0.8653516295025729,
'f1': 0.8856518572469045,
'p': 0.8150134048257373,
'r': 0.9696969696969697}],
'news.yahoo.com;2005': [{'a': 0.7070275403608737,
'f1': 0.7163218390804597,
'p': 0.5740604274134119,
'r': 0.9523227383863081},
{'a': 0.6801275239107333,
'f1': 0.6508120649651972,
'p': 0.5031390134529148,
'r': 0.9211822660098522},
{'a': 0.7556390977443609,
'f1': 0.7746967071057193,
'p': 0.6573529411764706,
'r': 0.9430379746835443},
{'a': 0.7078103207810321,
'f1': 0.6366001734605377,
'p': 0.48673740053050396,
'r': 0.9197994987468672},
{'a': 0.6743814844373504,
'f1': 0.49127182044887774,
'p': 0.3367521367521368,
'r': 0.9078341013824884},
{'a': 0.7512332628611699,
'f1': 0.7104183757178015,
'p': 0.568988173455979,
'r': 0.9454148471615721},
{'a': 0.6431273644388399,
'f1': 0.546474358974359,
'p': 0.3866213151927438,
'r': 0.9316939890710383},
{'a': 0.7197802197802198,
'f1': 0.6700215672178289,
'p': 0.528344671201814,
'r': 0.9155206286836935},
{'a': 0.8564971751412429,
'f1': 0.8095952023988006,
'p': 0.7209612817089452,
'r': 0.9230769230769231},
{'a': 0.7428814279643009,
'f1': 0.7760088855979267,
'p': 0.6624525916561315,
'r': 0.936550491510277},
{'a': 0.8256555634301913,
'f1': 0.00806451612903226,
'p': 1.0,
'r': 0.004048582995951417},
{'a': 0.6725641025641026,
'f1': 0.697465055674011,
'p': 0.5531754979331078,
'r': 0.9435897435897436},
{'a': 0.8565415643633812,
'f1': 0.8611205432937182,
'p': 0.7875776397515528,
'r': 0.949812734082397},
{'a': 0.8178925035360679,
'f1': 0.8610736444564336,
'p': 0.780440097799511,
'r': 0.9602888086642599},
{'a': 0.815807560137457,
'f1': 0.8646464646464647,
'p': 0.7781818181818182,
'r': 0.9727272727272728},
{'a': 0.7583130575831306,
'f1': 0.7849927849927849,
'p': 0.6770379589296827,
'r': 0.9339055793991416},
{'a': 0.6638584667228307,
'f1': 0.3926940639269406,
'p': 0.258,
'r': 0.821656050955414},
{'a': 0.6920391061452514,
'f1': 0.6073018699910953,
'p': 0.45588235294117646,
'r': 0.9093333333333333},
{'a': 0.7992191857222533,
'f1': 0.8163265306122449,
'p': 0.7092198581560284,
'r': 0.9615384615384616},
{'a': 0.8023696682464455,
'f1': 0.830142566191446,
'p': 0.728898426323319,
'r': 0.9640491958372753},
{'a': 0.6121553884711779,
'f1': 0.47763713080168774,
'p': 0.3252873563218391,
'r': 0.8984126984126984},
{'a': 0.7097715119105493,
'f1': 0.7028372324539571,
'p': 0.5643485211830536,
'r': 0.9313984168865436},
{'a': 0.7966339410939691,
'f1': 0.8203221809169765,
'p': 0.7211328976034859,
'r': 0.9511494252873564},
{'a': 0.6212624584717608,
'f1': 0.4761029411764706,
'p': 0.32254047322540474,
'r': 0.9087719298245615},
{'a': 0.7738258093935249,
'f1': 0.7967213114754098,
'p': 0.6821052631578948,
'r': 0.9576354679802955}],
'news.yahoo.com;2010': [{'a': 0.6850789096126255,
'f1': 0.6442463533225282,
'p': 0.5073388640714741,
'r': 0.8823529411764706},
{'a': 0.6445198836081474,
'f1': 0.37081545064377686,
'p': 0.2440677966101695,
'r': 0.7714285714285715},
{'a': 0.666268418956591,
'f1': 0.5509110396570204,
'p': 0.41318327974276525,
'r': 0.8263665594855305},
{'a': 0.6745737583395107,
'f1': 0.616593886462882,
'p': 0.48092643051771117,
'r': 0.8588807785888077},
{'a': 0.7148050231328487,
'f1': 0.7014873746108613,
'p': 0.5844380403458214,
'r': 0.8771626297577855},
{'a': 0.6509856630824373,
'f1': 0.4775318578135479,
'p': 0.3352165725047081,
'r': 0.8298368298368298},
{'a': 0.7006802721088435,
'f1': 0.6570959803117309,
'p': 0.526281208935611,
'r': 0.8744541484716157},
{'a': 0.6556343577620173,
'f1': 0.5356004250797025,
'p': 0.39069767441860465,
'r': 0.8513513513513513},
{'a': 0.7001763668430335,
'f1': 0.6785173978819969,
'p': 0.5496323529411765,
'r': 0.8863636363636364},
{'a': 0.6288172043010752,
'f1': 0.42809807819748175,
'p': 0.2915162454873646,
'r': 0.8054862842892768},
{'a': 0.6278962001853569,
'f1': 0.3059636992221262,
'p': 0.19323144104803494,
'r': 0.7344398340248963},
{'a': 0.6378299120234604,
'f1': 0.3471365638766519,
'p': 0.22234762979683972,
'r': 0.7911646586345381},
{'a': 0.7478138222849083,
'f1': 0.7704160246533127,
'p': 0.6781193490054249,
'r': 0.89179548156956},
{'a': 0.6349047141424273,
'f1': 0.27920792079207923,
'p': 0.1700844390832328,
'r': 0.7790055248618785},
{'a': 0.7163333333333334,
'f1': 0.6839955440029707,
'p': 0.562958435207824,
'r': 0.8713339640491958},
{'a': 0.6885841064945543,
'f1': 0.5974973931178311,
'p': 0.45621019108280253,
'r': 0.8655589123867069},
{'a': 0.6682750301568154,
'f1': 0.559059326563335,
'p': 0.4217741935483871,
'r': 0.8288431061806656},
{'a': 0.6471316549731737,
'f1': 0.5094664371772805,
'p': 0.37,
'r': 0.8176795580110497},
{'a': 0.6820695102685624,
'f1': 0.5715806279936136,
'p': 0.4387254901960784,
'r': 0.8198473282442749},
{'a': 0.6826347305389222,
'f1': 0.5895715023231801,
'p': 0.4499605988967691,
'r': 0.8547904191616766},
{'a': 0.6681564245810055,
'f1': 0.5791213982050071,
'p': 0.438483547925608,
'r': 0.8525730180806675},
{'a': 0.7021857923497268,
'f1': 0.6743838685586259,
'p': 0.5598264104153751,
'r': 0.847887323943662},
{'a': 0.6970802919708029,
'f1': 0.6452991452991453,
'p': 0.5210489993098689,
'r': 0.8473625140291807},
{'a': 0.7001041305102395,
'f1': 0.665893271461717,
'p': 0.5361145703611457,
'r': 0.8785714285714286},
{'a': 0.6342222222222222,
'f1': 0.380737396538751,
'p': 0.2517412935323383,
'r': 0.7808641975308642}],
'news.yahoo.com;2015': [{'a': 0.3026707853803887,
'f1': 0.03546716073656747,
'p': 0.018202988550359015,
'r': 0.6876832844574781},
{'a': 0.34267970894746663,
'f1': 0.02506325110689437,
'p': 0.012768871344558124,
'r': 0.674468085106383},
{'a': 0.2536711184252168,
'f1': 0.018806299278121035,
'p': 0.009534054715349407,
'r': 0.6848484848484848},
{'a': 0.3412615568147927,
'f1': 0.04791838787312763,
'p': 0.02479553903345725,
'r': 0.7103301384451545},
{'a': 0.3789141768911367,
'f1': 0.026045236463331047,
'p': 0.01326286398085361,
'r': 0.7189189189189189},
{'a': 0.32849275211721307,
'f1': 0.06591262241173708,
'p': 0.03447493762757995,
'r': 0.7481542247744053},
{'a': 0.3465504358655044,
'f1': 0.05271519352975158,
'p': 0.027376711044440276,
'r': 0.7080504364694471},
{'a': 0.34739285807444503,
'f1': 0.05620732581387453,
'p': 0.029155089421985678,
'r': 0.7792887029288703},
{'a': 0.25936621208914157,
'f1': 0.034225104981960845,
'p': 0.017505646982897707,
'r': 0.762071992976295},
{'a': 0.38322787798658986,
'f1': 0.02846233946546338,
'p': 0.014514008293719025,
'r': 0.7302798982188295},
{'a': 0.28831313479540693,
'f1': 0.008643936315614494,
'p': 0.0043505744989402444,
'r': 0.6573033707865169},
{'a': 0.3385938686989977,
'f1': 0.03648436384406683,
'p': 0.01875366999412801,
'r': 0.668848167539267},
{'a': 0.36024452471771035,
'f1': 0.04818099819603126,
'p': 0.024929024228989228,
'r': 0.7162011173184357},
{'a': 0.3311775376606988,
'f1': 0.04346518273424575,
'p': 0.02245270558986909,
'r': 0.6776084407971864},
{'a': 0.24746535746448595,
'f1': 0.029339578454332552,
'p': 0.01497905228320548,
'r': 0.7105263157894737},
{'a': 0.3340227507755946,
'f1': 0.02296211251435132,
'p': 0.011681268251981644,
'r': 0.6698564593301436},
{'a': 0.3391250155802069,
'f1': 0.03396130160696716,
'p': 0.017427075542258787,
'r': 0.662873399715505},
{'a': 0.2548459563543004,
'f1': 0.013342965197807337,
'p': 0.006744855436697169,
'r': 0.61328125},
{'a': 0.32861473705309047,
'f1': 0.03388975015322493,
'p': 0.017406118065328494,
'r': 0.6394557823129252},
{'a': 0.3466113276307132,
'f1': 0.04221694613444131,
'p': 0.021743284038366006,
'r': 0.7229987293519695},
{'a': 0.32904955855194923,
'f1': 0.018576080521018355,
'p': 0.009431480855221133,
'r': 0.610705596107056},
{'a': 0.2667446051926543,
'f1': 0.024917409798544683,
'p': 0.0126782536090176,
'r': 0.7194513715710723},
{'a': 0.34213977443429744,
'f1': 0.06997910065394729,
'p': 0.03672256421142008,
'r': 0.7414285714285714},
{'a': 0.34551737106859104,
'f1': 0.06400645443237973,
'p': 0.033478688985792655,
'r': 0.7261632341723875},
{'a': 0.2554262838556327,
'f1': 0.010926464478604072,
'p': 0.005513973625175588,
'r': 0.5936794582392777}],
'thenation.com;2000': [{'a': 0.9252912954078135,
'f1': 0.9477719214183038,
'p': 0.9242990654205607,
'r': 0.9724680432645034},
{'a': 0.8879573170731707,
'f1': 0.912759643916914,
'p': 0.8718820861678005,
'r': 0.9576587795765878},
{'a': 0.8117647058823529,
'f1': 0.8422535211267607,
'p': 0.7465667915106118,
'r': 0.9660743134087237},
{'a': 0.9150970733120835,
'f1': 0.9501106759747998,
'p': 0.9177631578947368,
'r': 0.984821743734557},
{'a': 0.7437765634486946,
'f1': 0.8024344569288389,
'p': 0.6845047923322684,
'r': 0.9694570135746606},
{'a': 0.8315858453473133,
'f1': 0.8753032508491024,
'p': 0.7926186291739895,
'r': 0.9772481040086674},
{'a': 0.9272271016311167,
'f1': 0.9582373271889402,
'p': 0.9295892707460185,
'r': 0.9887072808320951},
{'a': 0.9277078085642317,
'f1': 0.9583997680823307,
'p': 0.9302194710185706,
'r': 0.9883408071748879},
{'a': 0.8844836025452766,
'f1': 0.9242132305716121,
'p': 0.8721212121212121,
'r': 0.9829234972677595},
{'a': 0.8874700718276137,
'f1': 0.9109286165508528,
'p': 0.8583333333333333,
'r': 0.9703903095558546},
{'a': 0.8984098939929329,
'f1': 0.93542953396968,
'p': 0.8918629550321199,
'r': 0.9834710743801653},
{'a': 0.7912234042553191,
'f1': 0.839632277834525,
'p': 0.7358997314234557,
'r': 0.9774078478002378},
{'a': 0.7303921568627451,
'f1': 0.6507936507936508,
'p': 0.4963680387409201,
'r': 0.9447004608294931},
{'a': 0.7250900360144058,
'f1': 0.6482334869431643,
'p': 0.4929906542056075,
'r': 0.9461883408071748},
{'a': 0.9218950064020487,
'f1': 0.9451438848920863,
'p': 0.9115351257588898,
'r': 0.9813258636788048},
{'a': 0.8310991957104558,
'f1': 0.87374749498998,
'p': 0.7884267631103075,
'r': 0.9797752808988764},
{'a': 0.9191542288557214,
'f1': 0.9434782608695652,
'p': 0.9132996632996633,
'r': 0.9757194244604317},
{'a': 0.9555752998667258,
'f1': 0.972617743702081,
'p': 0.9646931015752308,
'r': 0.9806736609607951},
{'a': 0.8501650165016502,
'f1': 0.8901790033865505,
'p': 0.8177777777777778,
'r': 0.9766454352441614},
{'a': 0.8850758180367119,
'f1': 0.9090909090909091,
'p': 0.8540925266903915,
'r': 0.97165991902834},
{'a': 0.9479119892231702,
'f1': 0.9704834605597965,
'p': 0.9535,
'r': 0.9880829015544041},
{'a': 0.9589581689029203,
'f1': 0.9779598756710935,
'p': 0.9640668523676881,
'r': 0.9922591743119266},
{'a': 0.9226679555340744,
'f1': 0.9511599511599511,
'p': 0.9186320754716981,
'r': 0.9860759493670886},
{'a': 0.847394540942928,
'f1': 0.8902765388046386,
'p': 0.8213991769547325,
'r': 0.9717624148003895},
{'a': 0.8856269113149847,
'f1': 0.9203238176395397,
'p': 0.8723747980613893,
'r': 0.9738503155996393}],
'thenation.com;2005': [{'a': 0.7705050505050505,
'f1': 0.8516971279373368,
'p': 0.761437908496732,
'r': 0.9662322274881516},
{'a': 0.5908304498269896,
'f1': 0.6554989075018208,
'p': 0.5090497737556561,
'r': 0.9202453987730062},
{'a': 0.7903430749682337,
'f1': 0.8648648648648648,
'p': 0.7810650887573964,
'r': 0.9688073394495413},
{'a': 0.8518687329079307,
'f1': 0.9112749112749112,
'p': 0.8489318413021363,
'r': 0.9835002946375958},
{'a': 0.6226101413133832,
'f1': 0.695710455764075,
'p': 0.5604751619870411,
'r': 0.9169611307420494},
{'a': 0.8763693270735524,
'f1': 0.9007537688442211,
'p': 0.8515439429928741,
'r': 0.956},
{'a': 0.7466918714555766,
'f1': 0.8224381625441697,
'p': 0.7156033820138356,
'r': 0.9667705088265836},
{'a': 0.8217105263157894,
'f1': 0.8539083557951482,
'p': 0.7991927346115035,
'r': 0.9166666666666666},
{'a': 0.763915547024952,
'f1': 0.8353413654618473,
'p': 0.7434471803018269,
'r': 0.9531568228105907},
{'a': 0.8094768015794669,
'f1': 0.8471892319873319,
'p': 0.7793153678077204,
'r': 0.9280138768430182},
{'a': 0.8035866780529461,
'f1': 0.8675115207373272,
'p': 0.789308176100629,
'r': 0.9629156010230179},
{'a': 0.6837944664031621,
'f1': 0.78125,
'p': 0.6596306068601583,
'r': 0.9578544061302682},
{'a': 0.8183246073298429,
'f1': 0.8623562078540262,
'p': 0.7837058399423216,
'r': 0.9585537918871252},
{'a': 0.7444839857651245,
'f1': 0.8254739912493922,
'p': 0.7244027303754266,
'r': 0.9593220338983051},
{'a': 0.8217948717948718,
'f1': 0.8878127522195318,
'p': 0.8190618019359642,
'r': 0.9691629955947136},
{'a': 0.9265043638033992,
'f1': 0.9582681272822117,
'p': 0.9348600508905852,
'r': 0.9828785446762975},
{'a': 0.8263118994826312,
'f1': 0.8509828788839569,
'p': 0.7748267898383372,
'r': 0.9437412095639943},
{'a': 0.861094761624485,
'f1': 0.8971229293809939,
'p': 0.8359057676685622,
'r': 0.9680150517403575},
{'a': 0.7757078986587184,
'f1': 0.8064308681672027,
'p': 0.7021276595744681,
'r': 0.947129909365559},
{'a': 0.8259162303664922,
'f1': 0.852549889135255,
'p': 0.7767676767676768,
'r': 0.9447174447174447},
{'a': 0.7938084112149533,
'f1': 0.833096926713948,
'p': 0.7447168216398986,
'r': 0.9452789699570815},
{'a': 0.8478513356562137,
'f1': 0.8789279112754158,
'p': 0.8291194420226679,
'r': 0.9351032448377581},
{'a': 0.7905956112852665,
'f1': 0.8166849615806807,
'p': 0.7286973555337904,
'r': 0.9288389513108615},
{'a': 0.8188097768331563,
'f1': 0.8554472233997457,
'p': 0.7888975762314308,
'r': 0.9342592592592592},
{'a': 0.7749627421758569,
'f1': 0.8476286579212916,
'p': 0.7567567567567568,
'r': 0.963302752293578}],
'thenation.com;2010': [{'a': 0.6572834107679694,
'f1': 0.6148648648648648,
'p': 0.47418285172903835,
'r': 0.874235807860262},
{'a': 0.6598915989159891,
'f1': 0.6163252827881381,
'p': 0.4761454888993859,
'r': 0.8734835355285961},
{'a': 0.6137999503598908,
'f1': 0.576252723311547,
'p': 0.4269572235673931,
'r': 0.8860971524288107},
{'a': 0.6807417974322396,
'f1': 0.6261276311393251,
'p': 0.48827514330380406,
'r': 0.87243947858473},
{'a': 0.7039106145251397,
'f1': 0.6980838943552564,
'p': 0.5702199661590525,
'r': 0.8998664886515354},
{'a': 0.7282674772036474,
'f1': 0.36953455571227084,
'p': 0.24125230202578268,
'r': 0.7891566265060241},
{'a': 0.6360457724094087,
'f1': 0.4816659121774559,
'p': 0.3427835051546392,
'r': 0.8097412480974124},
{'a': 0.4441632928475034,
'f1': 0.4119221845439943,
'p': 0.2687470889613414,
'r': 0.881588999236058},
{'a': 0.8558411214953271,
'f1': 0.8823192828533283,
'p': 0.8296269727403156,
'r': 0.9421588594704684},
{'a': 0.7728010386238234,
'f1': 0.7374343585896475,
'p': 0.642483660130719,
'r': 0.8653169014084507},
{'a': 0.6851628468033776,
'f1': 0.6186997808619431,
'p': 0.4788015828151498,
'r': 0.8740970072239422},
{'a': 0.679078882900539,
'f1': 0.6713497240341193,
'p': 0.5395161290322581,
'r': 0.8884462151394422},
{'a': 0.6242077284808832,
'f1': 0.631958350020024,
'p': 0.48598706498306127,
'r': 0.9032627361190613},
{'a': 0.6002055498458376,
'f1': 0.5546651402404121,
'p': 0.40526976160602257,
'r': 0.8785131459655485},
{'a': 0.6874809625342675,
'f1': 0.6191536748329622,
'p': 0.4868651488616462,
'r': 0.8501529051987767},
{'a': 0.5560009431737798,
'f1': 0.4917678812415654,
'p': 0.3423524990605036,
'r': 0.8726053639846744},
{'a': 0.7311007957559682,
'f1': 0.6752102523027633,
'p': 0.5495436766623207,
'r': 0.8753894080996885},
{'a': 0.7438772499262319,
'f1': 0.7192755498059509,
'p': 0.6017316017316018,
'r': 0.8938906752411575},
{'a': 0.36651776217825993,
'f1': 0.40570744362402716,
'p': 0.2596755652062843,
'r': 0.9270405836753306},
{'a': 0.5446330777656079,
'f1': 0.4146427314325942,
'p': 0.2773069679849341,
'r': 0.8214783821478382},
{'a': 0.49875259875259875,
'f1': 0.44382929642445207,
'p': 0.2980173482032218,
'r': 0.8690153568202349},
{'a': 0.7675413576725614,
'f1': 0.7616262064931267,
'p': 0.6542713567839196,
'r': 0.9111266620013996},
{'a': 0.5550983081847279,
'f1': 0.5285852713178294,
'p': 0.3743994509265614,
'r': 0.8986820428336079},
{'a': 0.7893442622950819,
'f1': 0.7763272410791993,
'p': 0.6781550937658388,
'r': 0.9077340569877883},
{'a': 0.6883074935400517,
'f1': 0.601074824307565,
'p': 0.4672236503856041,
'r': 0.8424101969872537}],
'thenation.com;2015': [{'a': 0.6909267345948569,
'f1': 0.7416869424168694,
'p': 0.6197899017282277,
'r': 0.9232710752145381},
{'a': 0.6075484301937207,
'f1': 0.5587683064213294,
'p': 0.4165733482642777,
'r': 0.8483466362599772},
{'a': 0.6410725167580743,
'f1': 0.6434624697336562,
'p': 0.5,
'r': 0.902376910016978},
{'a': 0.7708618331053352,
'f1': 0.8366252133625944,
'p': 0.7509301816590064,
'r': 0.9443985686760253},
{'a': 0.659557867360208,
'f1': 0.6926508570086874,
'p': 0.5582891748675246,
'r': 0.9121830550401979},
{'a': 0.635077793493635,
'f1': 0.6535982814178303,
'p': 0.5194195475885617,
'r': 0.8812454742939899},
{'a': 0.628898426323319,
'f1': 0.6451436388508892,
'p': 0.5075333620318554,
'r': 0.8851351351351351},
{'a': 0.5171009771986971,
'f1': 0.3128621089223638,
'p': 0.19607843137254902,
'r': 0.7736389684813754},
{'a': 0.5952216066481995,
'f1': 0.5488228483211115,
'p': 0.4016949152542373,
'r': 0.8660170523751523},
{'a': 0.636637541428141,
'f1': 0.6475745178258329,
'p': 0.506398537477148,
'r': 0.8978930307941653},
{'a': 0.5004492362982929,
'f1': 0.1588502269288956,
'p': 0.08974358974358974,
'r': 0.6907894736842105},
{'a': 0.623384809328711,
'f1': 0.5993965806235333,
'p': 0.45658835546475995,
'r': 0.8721951219512195},
{'a': 0.6374963778614894,
'f1': 0.6504610226320201,
'p': 0.5130013221683561,
'r': 0.8885496183206106},
{'a': 0.6366366366366366,
'f1': 0.645577035735208,
'p': 0.5027372262773723,
'r': 0.9018003273322422},
{'a': 0.6389476867251285,
'f1': 0.6502636203866432,
'p': 0.5080091533180778,
'r': 0.903173311635476},
{'a': 0.5163532297628781,
'f1': 0.30124040165386884,
'p': 0.18916913946587538,
'r': 0.7391304347826086},
{'a': 0.7247766397908041,
'f1': 0.7833247555326813,
'p': 0.6730542452830188,
'r': 0.9368075502667214},
{'a': 0.7764031382015691,
'f1': 0.8454640250260687,
'p': 0.7601725107819238,
'r': 0.9523138360347663},
{'a': 0.726676235390609,
'f1': 0.7880426140880904,
'p': 0.6830209481808158,
'r': 0.9312288613303269},
{'a': 0.7433193964733685,
'f1': 0.8090859924283397,
'p': 0.7083333333333334,
'r': 0.9432534678436317},
{'a': 0.6347986474023978,
'f1': 0.6305970149253731,
'p': 0.4893822393822394,
'r': 0.8863636363636364},
{'a': 0.6514334374112972,
'f1': 0.6836682122617208,
'p': 0.546315356113627,
'r': 0.9132828630419821},
{'a': 0.7524957619137314,
'f1': 0.8158116063919258,
'p': 0.7211895910780669,
'r': 0.9390125847047435},
{'a': 0.5140388768898488,
'f1': 0.2424242424242424,
'p': 0.14551333872271624,
'r': 0.7258064516129032},
{'a': 0.596252129471891,
'f1': 0.5533358462118357,
'p': 0.4066481994459834,
'r': 0.8655660377358491}],
'www.cnn.com;2000': [{'a': 0.7250922509225092,
'f1': 0.6471981057616416,
'p': 0.5444887118193891,
'r': 0.7976653696498055},
{'a': 0.74216106474171,
'f1': 0.7755743176909483,
'p': 0.7391467065868264,
'r': 0.8157786038826931},
{'a': 0.945568538045027,
'f1': 0.9680548586720188,
'p': 0.9605044805841354,
'r': 0.9757248819959542},
{'a': 0.7609953703703703,
'f1': 0.6262443438914028,
'p': 0.5210843373493976,
'r': 0.7845804988662132},
{'a': 0.7159609120521173,
'f1': 0.586337760910816,
'p': 0.4510948905109489,
'r': 0.8373983739837398},
{'a': 0.7988696808510638,
'f1': 0.8393947438279797,
'p': 0.7818991097922848,
'r': 0.9060171919770774},
{'a': 0.7905633124699085,
'f1': 0.7949080622347949,
'p': 0.7446996466431095,
'r': 0.8523761375126391},
{'a': 0.7510024057738572,
'f1': 0.7685426761088333,
'p': 0.6827814569536423,
'r': 0.8789428815004262},
{'a': 0.7171084337349397,
'f1': 0.7200762994754412,
'p': 0.6128246753246753,
'r': 0.8728323699421965},
{'a': 0.7616794795978711,
'f1': 0.7428206764518187,
'p': 0.6395604395604395,
'r': 0.8858447488584474},
{'a': 0.7936305732484077,
'f1': 0.771830985915493,
'p': 0.727433628318584,
'r': 0.822},
{'a': 0.7689030883919062,
'f1': 0.769883351007423,
'p': 0.6728452270620945,
'r': 0.8996282527881041},
{'a': 0.7722332015810277,
'f1': 0.7194157029823494,
'p': 0.6466083150984683,
'r': 0.8106995884773662},
{'a': 0.7437114380635975,
'f1': 0.7398843930635839,
'p': 0.6569717707442259,
'r': 0.8467475192943771},
{'a': 0.814926498303807,
'f1': 0.8506236689990873,
'p': 0.7920679886685552,
'r': 0.9185282522996058},
{'a': 0.7576601671309192,
'f1': 0.646053702196908,
'p': 0.5401360544217687,
'r': 0.8036437246963563},
{'a': 0.7560837577815507,
'f1': 0.7405177603853099,
'p': 0.643979057591623,
'r': 0.8711048158640227},
{'a': 0.7437908496732026,
'f1': 0.4716981132075472,
'p': 0.3391472868217054,
'r': 0.7743362831858407},
{'a': 0.7461220268872802,
'f1': 0.730958904109589,
'p': 0.629245283018868,
'r': 0.8718954248366013},
{'a': 0.7638036809815951,
'f1': 0.7179487179487181,
'p': 0.6140350877192983,
'r': 0.8641975308641975},
{'a': 0.7288246881155613,
'f1': 0.5970731707317073,
'p': 0.4629349470499244,
'r': 0.8406593406593407},
{'a': 0.789838337182448,
'f1': 0.8325666973321066,
'p': 0.7363710333604556,
'r': 0.9576719576719577},
{'a': 0.7944383860414395,
'f1': 0.8492602958816473,
'p': 0.7537260468417317,
'r': 0.9725274725274725},
{'a': 0.7624914442162902,
'f1': 0.8056022408963587,
'p': 0.7646224742998936,
'r': 0.851223362273086},
{'a': 0.7192349332370985,
'f1': 0.7101341281669152,
'p': 0.6302910052910053,
'r': 0.8131399317406144}],
'www.cnn.com;2005': [{'a': 0.6566113624937154,
'f1': 0.54314381270903,
'p': 0.39303000968054214,
'r': 0.8787878787878788},
{'a': 0.7129000969932104,
'f1': 0.5945205479452055,
'p': 0.45020746887966806,
'r': 0.875},
{'a': 0.7512173528109783,
'f1': 0.7386046511627906,
'p': 0.618380062305296,
'r': 0.9168591224018475},
{'a': 0.7270258035310095,
'f1': 0.658356940509915,
'p': 0.5296262534184139,
'r': 0.8697604790419161},
{'a': 0.7139796464916979,
'f1': 0.530755711775044,
'p': 0.38767650834403083,
'r': 0.841225626740947},
{'a': 0.7610976594027441,
'f1': 0.7306642402183805,
'p': 0.6106463878326996,
'r': 0.9093997734994338},
{'a': 0.7163677130044843,
'f1': 0.575503355704698,
'p': 0.42875,
'r': 0.875},
{'a': 0.7221333333333333,
'f1': 0.5473501303214596,
'p': 0.41015625,
'r': 0.8224543080939948},
{'a': 0.7564364527993461,
'f1': 0.7600644122383252,
'p': 0.6505858028945555,
'r': 0.9138431752178122},
{'a': 0.738359201773836,
'f1': 0.46846846846846846,
'p': 0.33226837060702874,
'r': 0.7938931297709924},
{'a': 0.7715536105032823,
'f1': 0.7408142999006951,
'p': 0.6190871369294606,
'r': 0.9221260815822002},
{'a': 0.7274579724910851,
'f1': 0.5810493343774471,
'p': 0.4380165289256198,
'r': 0.8627906976744186},
{'a': 0.7284240150093808,
'f1': 0.6342387871130765,
'p': 0.4990059642147117,
'r': 0.8700173310225303},
{'a': 0.8263620386643233,
'f1': 0.805358550039401,
'p': 0.7263681592039801,
'r': 0.9036251105216623},
{'a': 0.7647544366487825,
'f1': 0.7450805008944543,
'p': 0.639293937068304,
'r': 0.8928188638799571},
{'a': 0.7192268565615463,
'f1': 0.5855855855855856,
'p': 0.44368600682593856,
'r': 0.8609271523178808},
{'a': 0.8219017926734217,
'f1': 0.7643115007735946,
'p': 0.6657681940700808,
'r': 0.8970944309927361},
{'a': 0.7243107769423559,
'f1': 0.6703296703296703,
'p': 0.5415657788539144,
'r': 0.8794233289646134},
{'a': 0.7138643067846607,
'f1': 0.5578851412944393,
'p': 0.4135135135135135,
'r': 0.8571428571428571},
{'a': 0.7094594594594594,
'f1': 0.4702258726899384,
'p': 0.3262108262108262,
'r': 0.8419117647058824},
{'a': 0.7523928422804828,
'f1': 0.7205260685767966,
'p': 0.6001564945226917,
'r': 0.9012925969447708},
{'a': 0.7447199265381084,
'f1': 0.7208835341365463,
'p': 0.5948632974316487,
'r': 0.9146496815286624},
{'a': 0.7483537158984007,
'f1': 0.6739792809262645,
'p': 0.5368932038834952,
'r': 0.9050736497545008},
{'a': 0.7225853304284677,
'f1': 0.5639269406392694,
'p': 0.42512908777969016,
'r': 0.8372881355932204},
{'a': 0.7688588007736944,
'f1': 0.799328295549958,
'p': 0.702755905511811,
'r': 0.9266709928617781}],
'www.cnn.com;2010': [{'a': 0.5958498023715415,
'f1': 0.6487260234755224,
'p': 0.4982409850483729,
'r': 0.9294503691550451},
{'a': 0.4556716995741386,
'f1': 0.44470774091627174,
'p': 0.29292403746097817,
'r': 0.9229508196721311},
{'a': 0.5746956842493545,
'f1': 0.6205988812109245,
'p': 0.4631630648330059,
'r': 0.9401794616151545},
{'a': 0.5945371775417299,
'f1': 0.669470559129144,
'p': 0.5140577507598785,
'r': 0.9595744680851064},
{'a': 0.3890641430073607,
'f1': 0.19080779944289694,
'p': 0.10804416403785488,
'r': 0.8154761904761905},
{'a': 0.4409411764705882,
'f1': 0.35783783783783785,
'p': 0.22274562584118437,
'r': 0.9093406593406593},
{'a': 0.3878193909695485,
'f1': 0.3656147986942329,
'p': 0.22784810126582278,
'r': 0.9247706422018349},
{'a': 0.5213572854291417,
'f1': 0.5307240704500978,
'p': 0.3731425426527243,
'r': 0.9186991869918699},
{'a': 0.5541215653621981,
'f1': 0.5672727272727273,
'p': 0.4100467289719626,
'r': 0.9200524246395806},
{'a': 0.6208609271523179,
'f1': 0.6846598733131367,
'p': 0.5392624728850325,
'r': 0.9374057315233786},
{'a': 0.40233497979344407,
'f1': 0.2893753336892685,
'p': 0.17294192724952137,
'r': 0.8856209150326797},
{'a': 0.4649961250322914,
'f1': 0.5155555555555555,
'p': 0.3587239583333333,
'r': 0.9160432252701579},
{'a': 0.5091452034341172,
'f1': 0.5278276481149012,
'p': 0.37009063444108764,
'r': 0.9198998748435544},
{'a': 0.5600979192166463,
'f1': 0.6446509788412101,
'p': 0.4871488344291692,
'r': 0.9526592635885447},
{'a': 0.20246099200811873,
'f1': 0.10581709571895889,
'p': 0.056526363774502354,
'r': 0.8266666666666667},
{'a': 0.41228070175438597,
'f1': 0.3684661525278492,
'p': 0.23056300268096513,
'r': 0.9168443496801706},
{'a': 0.5926611162503854,
'f1': 0.6666666666666666,
'p': 0.5134084726000777,
'r': 0.9503597122302159},
{'a': 0.45343303874915025,
'f1': 0.4727868852459016,
'p': 0.31846289752650175,
'r': 0.9173027989821882},
{'a': 0.6825902335456475,
'f1': 0.7601283594063377,
'p': 0.6348408710217756,
'r': 0.9470264867566217},
{'a': 0.45691423881821913,
'f1': 0.38226371061843645,
'p': 0.24690985830569792,
'r': 0.8460743801652892},
{'a': 0.10922867967688898,
'f1': 0.06861499364675985,
'p': 0.03575601776115915,
'r': 0.8468634686346863},
{'a': 0.597119775201967,
'f1': 0.6514737161956853,
'p': 0.4965261695229273,
'r': 0.9469964664310954},
{'a': 0.4669211195928753,
'f1': 0.44207723035952057,
'p': 0.2910578609000584,
'r': 0.9188191881918819},
{'a': 0.5534696614146832,
'f1': 0.5992779783393503,
'p': 0.44286349488661625,
'r': 0.9265116279069767},
{'a': 0.454513767863367,
'f1': 0.46786807208432507,
'p': 0.3142987665600731,
'r': 0.9148936170212766}],
'www.cnn.com;2015': [{'a': 0.5239771227452705,
'f1': 0.6165839829907866,
'p': 0.458377239199157,
'r': 0.9415584415584416},
{'a': 0.280306905370844,
'f1': 0.351313969571231,
'p': 0.21758994860079955,
'r': 0.9114832535885168},
{'a': 0.3640474844544941,
'f1': 0.3323442136498516,
'p': 0.20573108008817045,
'r': 0.8641975308641975},
{'a': 0.2231301068510371,
'f1': 0.29612756264236906,
'p': 0.17675050985723997,
'r': 0.9122807017543859},
{'a': 0.2742960944595822,
'f1': 0.34238683127572017,
'p': 0.21020717534108135,
'r': 0.9223946784922394},
{'a': 0.4528112449799197,
'f1': 0.502283105022831,
'p': 0.34965034965034963,
'r': 0.8914100486223663},
{'a': 0.40832395950506184,
'f1': 0.4380341880341881,
'p': 0.2883263009845288,
'r': 0.9111111111111111},
{'a': 0.5116175156389634,
'f1': 0.5836190476190476,
'p': 0.42579210672595885,
'r': 0.927360774818402},
{'a': 0.20112822738121067,
'f1': 0.2700237906423473,
'p': 0.1585195530726257,
'r': 0.910427807486631},
{'a': 0.09787153173698214,
'f1': 0.07663878622836023,
'p': 0.040286298568507156,
'r': 0.7848605577689243},
{'a': 0.2851123595505618,
'f1': 0.33347883020515057,
'p': 0.20416889363976484,
'r': 0.9095238095238095},
{'a': 0.25156669650850494,
'f1': 0.21575984990619138,
'p': 0.1232583065380493,
'r': 0.8646616541353384},
{'a': 0.520149001015916,
'f1': 0.5459788529317526,
'p': 0.3904674610449129,
'r': 0.9073482428115016},
{'a': 0.6155215379138483,
'f1': 0.7233606557377048,
'p': 0.5779778960294719,
'r': 0.9664613278576317},
{'a': 0.44800810263335583,
'f1': 0.5882649206748929,
'p': 0.42105263157894735,
'r': 0.9757727652464494},
{'a': 0.47843137254901963,
'f1': 0.5559265442404007,
'p': 0.3929203539823009,
'r': 0.9500713266761769},
{'a': 0.18812340524240315,
'f1': 0.22771403353927627,
'p': 0.1304677623261694,
'r': 0.8942807625649913},
{'a': 0.3710653753026634,
'f1': 0.35585864848109117,
'p': 0.22369446609508964,
'r': 0.8696969696969697},
{'a': 0.5073655361425146,
'f1': 0.5376205787781351,
'p': 0.3820840950639854,
'r': 0.9067245119305857},
{'a': 0.281437125748503,
'f1': 0.3695751484696208,
'p': 0.2318051575931232,
'r': 0.911036036036036},
{'a': 0.2067876874506709,
'f1': 0.2471910112359551,
'p': 0.14261019878997408,
'r': 0.9269662921348315},
{'a': 0.6125786163522012,
'f1': 0.704839482510781,
'p': 0.5569859901552442,
'r': 0.95955642530985},
{'a': 0.4265536723163842,
'f1': 0.4087378640776699,
'p': 0.26411543287327477,
'r': 0.9034334763948498},
{'a': 0.5626666666666666,
'f1': 0.6672463768115943,
'p': 0.511101243339254,
'r': 0.9607679465776294},
{'a': 0.44152311876699907,
'f1': 0.4007782101167315,
'p': 0.26292278238672623,
'r': 0.8425357873210634}],
'www.esquire.com;2000': [{'a': 0.9244147157190635,
'f1': 0.9000884173297966,
'p': 0.837171052631579,
'r': 0.9732313575525813},
{'a': 0.9411411411411411,
'f1': 0.9370988446726571,
'p': 0.9023485784919654,
'r': 0.9746328437917223},
{'a': 0.896568871208354,
'f1': 0.880597014925373,
'p': 0.8082191780821918,
'r': 0.9672131147540983},
{'a': 0.9186885245901639,
'f1': 0.8732106339468302,
'p': 0.799625468164794,
'r': 0.9617117117117117},
{'a': 0.9350835322195704,
'f1': 0.9354838709677419,
'p': 0.9021043000914913,
'r': 0.9714285714285714},
{'a': 0.920303605313093,
'f1': 0.8844036697247706,
'p': 0.8183361629881154,
'r': 0.9620758483033932},
{'a': 0.8321554770318021,
'f1': 0.7267497603068073,
'p': 0.5885093167701864,
'r': 0.949874686716792},
{'a': 0.9063400576368876,
'f1': 0.9108367626886145,
'p': 0.8556701030927835,
'r': 0.9736070381231672},
{'a': 0.8858981860737273,
'f1': 0.8305821025195482,
'p': 0.7353846153846154,
'r': 0.9540918163672655},
{'a': 0.911460895228726,
'f1': 0.918625678119349,
'p': 0.8721030042918455,
'r': 0.9703915950334289},
{'a': 0.9168490153172867,
'f1': 0.8822314049586776,
'p': 0.8227360308285164,
'r': 0.9510022271714922},
{'a': 0.8906789413118527,
'f1': 0.8571428571428571,
'p': 0.782967032967033,
'r': 0.946843853820598},
{'a': 0.9346307385229541,
'f1': 0.9320891653706583,
'p': 0.8909811694747275,
'r': 0.9771739130434782},
{'a': 0.8867387592487194,
'f1': 0.8401606425702812,
'p': 0.7450142450142451,
'r': 0.9631675874769797},
{'a': 0.888004246284501,
'f1': 0.8577208361429535,
'p': 0.7813267813267813,
'r': 0.9506726457399103},
{'a': 0.9171732522796353,
'f1': 0.8716136631330976,
'p': 0.8008658008658008,
'r': 0.9560723514211886},
{'a': 0.9357249626307922,
'f1': 0.9416553595658074,
'p': 0.9139596136962248,
'r': 0.9710820895522388},
{'a': 0.8933479934029687,
'f1': 0.8696236559139785,
'p': 0.8027295285359801,
'r': 0.9486803519061584},
{'a': 0.8737919272313814,
'f1': 0.806282722513089,
'p': 0.7,
'r': 0.9506172839506173},
{'a': 0.7936908517350157,
'f1': 0.5813060179257362,
'p': 0.42193308550185876,
'r': 0.934156378600823},
{'a': 0.9241231209735147,
'f1': 0.8954635108481264,
'p': 0.8407407407407408,
'r': 0.9578059071729957},
{'a': 0.9359047078842881,
'f1': 0.9350201265094882,
'p': 0.8983425414364641,
'r': 0.9748201438848921},
{'a': 0.9008086253369272,
'f1': 0.8756756756756757,
'p': 0.7950920245398773,
'r': 0.9744360902255639},
{'a': 0.1,
'f1': 0.09999999999999999,
'p': 0.05555555555555555,
'r': 0.5},
{'a': 0.8837342497136311,
'f1': 0.8329218106995885,
'p': 0.7397660818713451,
'r': 0.9529190207156308}],
'www.esquire.com;2005': [{'a': 0.9402859545836838,
'f1': 0.9348025711662075,
'p': 0.9000884173297966,
'r': 0.9723018147086915},
{'a': 0.9348381601362862,
'f1': 0.9273849074513526,
'p': 0.8889899909008189,
'r': 0.9692460317460317},
{'a': 0.9347929557353641,
'f1': 0.9088489687292083,
'p': 0.8580402010050251,
'r': 0.9660537482319661},
{'a': 0.9394353369763205,
'f1': 0.9270433351618212,
'p': 0.8820459290187892,
'r': 0.976878612716763},
{'a': 0.9410499139414802,
'f1': 0.9339759036144578,
'p': 0.8955637707948244,
'r': 0.9758308157099698},
{'a': 0.9008304836345872,
'f1': 0.8612440191387559,
'p': 0.7787391841779975,
'r': 0.963302752293578},
{'a': 0.887762490948588,
'f1': 0.9045566502463054,
'p': 0.8375142531356898,
'r': 0.9832663989290495},
{'a': 0.90703125,
'f1': 0.8936550491510276,
'p': 0.8424599831508003,
'r': 0.9514747859181731},
{'a': 0.9395770392749244,
'f1': 0.9319727891156463,
'p': 0.8954248366013072,
'r': 0.9716312056737588},
{'a': 0.9384404924760602,
'f1': 0.9256198347107437,
'p': 0.8795811518324608,
'r': 0.9767441860465116},
{'a': 0.921634014172572,
'f1': 0.9082031250000001,
'p': 0.8555657773689053,
'r': 0.967741935483871},
{'a': 0.8983488132094943,
'f1': 0.8425259792166265,
'p': 0.7485795454545454,
'r': 0.9634369287020109},
{'a': 0.8804463336875664,
'f1': 0.7974797479747975,
'p': 0.683641975308642,
'r': 0.9568034557235421},
{'a': 0.8800721370604148,
'f1': 0.873574144486692,
'p': 0.7956709956709956,
'r': 0.9683877766069547},
{'a': 0.9315307457479285,
'f1': 0.9212242849974912,
'p': 0.8801534036433365,
'r': 0.9663157894736842},
{'a': 0.8630533719486967,
'f1': 0.8398645379777456,
'p': 0.7457044673539519,
'r': 0.9612403100775194},
{'a': 0.9392605633802817,
'f1': 0.9299492385786803,
'p': 0.8884578079534433,
'r': 0.9755058572949947},
{'a': 0.9405111018014244,
'f1': 0.9353369763205829,
'p': 0.9000876424189308,
'r': 0.9734597156398104},
{'a': 0.9411513859275054,
'f1': 0.9347826086956522,
'p': 0.8950226244343892,
'r': 0.9782393669634025},
{'a': 0.9470782800441014,
'f1': 0.9496855345911951,
'p': 0.9219810040705563,
'r': 0.979106628242075},
{'a': 0.9362017804154302,
'f1': 0.9119453924914674,
'p': 0.8641655886157826,
'r': 0.9653179190751445},
{'a': 0.9264104660670482,
'f1': 0.9209138840070299,
'p': 0.8733333333333333,
'r': 0.9739776951672863},
{'a': 0.9337832138338253,
'f1': 0.9269427640763146,
'p': 0.8964896489648965,
'r': 0.9595375722543352},
{'a': 0.9363327674023769,
'f1': 0.929444967074318,
'p': 0.894927536231884,
'r': 0.9667318982387475},
{'a': 0.941267989109296,
'f1': 0.9409001956947163,
'p': 0.9092284417549168,
'r': 0.9748580697485807}],
'www.esquire.com;2010': [{'a': 0.5578565881439296,
'f1': 0.4486765655261459,
'p': 0.3058978873239437,
'r': 0.8414043583535109},
{'a': 0.6793708408953418,
'f1': 0.7275531185743661,
'p': 0.6007357102433503,
'r': 0.922241529105126},
{'a': 0.49786535303776686,
'f1': 0.23204419889502764,
'p': 0.13660555884092254,
'r': 0.77},
{'a': 0.5909209496477955,
'f1': 0.5632311977715877,
'p': 0.41639209225700163,
'r': 0.8700516351118761},
{'a': 0.6037735849056604,
'f1': 0.5948905109489052,
'p': 0.4478021978021978,
'r': 0.8858695652173914},
{'a': 0.589628681177977,
'f1': 0.46089150546677876,
'p': 0.32009345794392524,
'r': 0.8228228228228228},
{'a': 0.4929818555289284,
'f1': 0.320330426801285,
'p': 0.20011467889908258,
'r': 0.8022988505747126},
{'a': 0.5809628008752735,
'f1': 0.5606118546845125,
'p': 0.4018640350877193,
'r': 0.9266750948166877},
{'a': 0.5881726158763985,
'f1': 0.5575271894676588,
'p': 0.4065108514190317,
'r': 0.8870673952641166},
{'a': 0.614460668864814,
'f1': 0.6150952268986598,
'p': 0.47754654983570644,
'r': 0.8639365918097754},
{'a': 0.555004135649297,
'f1': 0.48036059240180295,
'p': 0.3375565610859729,
'r': 0.8325892857142857},
{'a': 0.5829145728643216,
'f1': 0.532540675844806,
'p': 0.38385205232295894,
'r': 0.8692543411644535},
{'a': 0.6373819459696903,
'f1': 0.6581072685856285,
'p': 0.5216677609980302,
'r': 0.8911946158160404},
{'a': 0.5265700483091788,
'f1': 0.3306010928961749,
'p': 0.20862068965517241,
'r': 0.7960526315789473},
{'a': 0.6213592233009708,
'f1': 0.618208516886931,
'p': 0.47732426303854875,
'r': 0.8770833333333333},
{'a': 0.5612718828152912,
'f1': 0.2372670807453416,
'p': 0.14075165806927045,
'r': 0.7549407114624506},
{'a': 0.44934086629001885,
'f1': 0.19670329670329673,
'p': 0.11257861635220126,
'r': 0.7782608695652173},
{'a': 0.6065668202764977,
'f1': 0.5334699453551913,
'p': 0.38875062220009954,
'r': 0.8498367791077258},
{'a': 0.623580313683072,
'f1': 0.5827338129496403,
'p': 0.44607618173474073,
'r': 0.8401037165082109},
{'a': 0.6771457085828343,
'f1': 0.6862269641125122,
'p': 0.5540328895849648,
'r': 0.9012738853503185},
{'a': 0.4807826694619147,
'f1': 0.28967495219885275,
'p': 0.1772966647162083,
'r': 0.7911227154046997},
{'a': 0.4745083758193736,
'f1': 0.2845810609816559,
'p': 0.17341389728096676,
'r': 0.7928176795580111},
{'a': 0.7214745465184318,
'f1': 0.7693798449612403,
'p': 0.662771285475793,
'r': 0.9168591224018475},
{'a': 0.6063515509601182,
'f1': 0.6140477914554671,
'p': 0.46661775495231106,
'r': 0.8976711362032463},
{'a': 0.6401311866428145,
'f1': 0.6583639966034532,
'p': 0.5139195757843571,
'r': 0.915748031496063}],
'www.esquire.com;2015': [{'a': 0.27375047691720716,
'f1': 0.014368932038834952,
'p': 0.007260596546310832,
'r': 0.6851851851851852},
{'a': 0.27848577474723724,
'f1': 0.03363355797694779,
'p': 0.01720804331013148,
'r': 0.739612188365651},
{'a': 0.27297310119075857,
'f1': 0.014659551211984825,
'p': 0.007403078121955971,
'r': 0.7402597402597403},
{'a': 0.452523611252158,
'f1': 0.07956291616868703,
'p': 0.04206535475717638,
'r': 0.7327044025157232},
{'a': 0.3059379217273954,
'f1': 0.13149451168027018,
'p': 0.07121951219512195,
'r': 0.8556776556776556},
{'a': 0.32285453041320156,
'f1': 0.1883936958563824,
'p': 0.10544456128052343,
'r': 0.8830724070450098},
{'a': 0.28436326549534097,
'f1': 0.061066845082415916,
'p': 0.03175406414067936,
'r': 0.7943037974683544},
{'a': 0.277951044324733,
'f1': 0.031931069437404966,
'p': 0.016321243523316063,
'r': 0.7325581395348837},
{'a': 0.3774253156760086,
'f1': 0.17540281460330412,
'p': 0.09759418974126191,
'r': 0.8651911468812877},
{'a': 0.4778957767349963,
'f1': 0.5349246507534925,
'p': 0.37357403495294794,
'r': 0.941620679639849},
{'a': 0.2830762038335671,
'f1': 0.0516974831488467,
'p': 0.026719509076962415,
'r': 0.793168880455408},
{'a': 0.3875558246915449,
'f1': 0.21301429822001752,
'p': 0.12138343864316595,
'r': 0.8690476190476191},
{'a': 0.28152973707643997,
'f1': 0.04533565823888404,
'p': 0.02336328626444159,
'r': 0.7615062761506276},
{'a': 0.30361500225937643,
'f1': 0.1158854913659572,
'p': 0.06227648292021211,
'r': 0.832646331409728},
{'a': 0.2944548915286887,
'f1': 0.08439973811082674,
'p': 0.04446256114386053,
'r': 0.8292397660818713},
{'a': 0.35679553607014747,
'f1': 0.09692221600447679,
'p': 0.05152921575627752,
'r': 0.8139097744360902},
{'a': 0.2837977296181631,
'f1': 0.05297109539759335,
'p': 0.02737705969096621,
'r': 0.8133333333333334},
{'a': 0.2790588013391805,
'f1': 0.024500733745932497,
'p': 0.012453784783031718,
'r': 0.75},
{'a': 0.35671390823925053,
'f1': 0.09261350801897447,
'p': 0.04909003831417624,
'r': 0.8167330677290837},
{'a': 0.31473175021987687,
'f1': 0.16431597576017587,
'p': 0.09064016092770086,
'r': 0.8779369627507163},
{'a': 0.27854289071680377,
'f1': 0.02341413755805815,
'p': 0.011900911972058729,
'r': 0.71875},
{'a': 0.4165834165834166,
'f1': 0.3061778682959946,
'p': 0.18494976419930284,
'r': 0.8886699507389163},
{'a': 0.3249466765333217,
'f1': 0.18421883219358234,
'p': 0.1030666902113132,
'r': 0.8664027709054923},
{'a': 0.32021662226492553,
'f1': 0.1722946024993353,
'p': 0.09575599952713086,
'r': 0.8585055643879174},
{'a': 0.34968225517353757,
'f1': 0.059391939665331135,
'p': 0.030814380044020543,
'r': 0.8181818181818182},
{'a': 0.2785321618192924,
'f1': 0.034580320653882425,
'p': 0.01769740652551644,
'r': 0.7513661202185792}],
'www.forbes.com;2000': [{'a': 0.6699155295646524,
'f1': 0.7190265486725664,
'p': 0.5671902268760908,
'r': 0.9818731117824774},
{'a': 0.6566011235955056,
'f1': 0.6930320150659134,
'p': 0.5359223300970873,
'r': 0.9804618117229129},
{'a': 0.8373308733087331,
'f1': 0.8982496633968069,
'p': 0.8303698435277382,
'r': 0.9782153330540427},
{'a': 0.6920462270564242,
'f1': 0.7386035776110791,
'p': 0.5947955390334573,
'r': 0.974124809741248},
{'a': 0.5675,
'f1': 0.44133476856835313,
'p': 0.29036827195467424,
'r': 0.9192825112107623},
{'a': 0.6636828644501279,
'f1': 0.7014755959137344,
'p': 0.5493333333333333,
'r': 0.9701726844583988},
{'a': 0.718421052631579,
'f1': 0.7833130822195222,
'p': 0.706355003652301,
'r': 0.8790909090909091},
{'a': 0.6119942196531792,
'f1': 0.6139468008626887,
'p': 0.45042194092827004,
'r': 0.963882618510158},
{'a': 0.7627977698935631,
'f1': 0.827178729689808,
'p': 0.7165706973768394,
'r': 0.9781659388646288},
{'a': 0.9245192307692308,
'f1': 0.95206106870229,
'p': 0.9159811985898942,
'r': 0.9910998092816274},
{'a': 0.6364874063989108,
'f1': 0.6598726114649681,
'p': 0.5048732943469786,
'r': 0.9522058823529411},
{'a': 0.8459829692706405,
'f1': 0.901468498342018,
'p': 0.8291938997821351,
'r': 0.9875454073689673},
{'a': 0.5948678071539658,
'f1': 0.595810705973623,
'p': 0.42953020134228187,
'r': 0.9721518987341772},
{'a': 0.6165254237288136,
'f1': 0.6252587991718426,
'p': 0.466529351184346,
'r': 0.9476987447698745},
{'a': 0.5602968460111317,
'f1': 0.4357142857142857,
'p': 0.2863849765258216,
'r': 0.9104477611940298},
{'a': 0.6955818965517241,
'f1': 0.7629039026437263,
'p': 0.6299376299376299,
'r': 0.9670212765957447},
{'a': 0.7408906882591093,
'f1': 0.8051750380517504,
'p': 0.6919555264879006,
'r': 0.9626933575978162},
{'a': 0.7126262626262626,
'f1': 0.7834031214312904,
'p': 0.6566687938736439,
'r': 0.970754716981132},
{'a': 0.6822916666666666,
'f1': 0.7409155261915998,
'p': 0.5987795575896262,
'r': 0.9715346534653465},
{'a': 0.47807332854061824,
'f1': 0.24844720496894407,
'p': 0.14457831325301204,
'r': 0.8823529411764706},
{'a': 0.8772563176895307,
'f1': 0.7875000000000001,
'p': 0.6847826086956522,
'r': 0.9264705882352942},
{'a': 0.6225490196078431,
'f1': 0.6350710900473934,
'p': 0.47325933400605447,
'r': 0.9650205761316872},
{'a': 0.7355666429080542,
'f1': 0.7792980368828079,
'p': 0.656312625250501,
'r': 0.95900439238653},
{'a': 0.6608927381745503,
'f1': 0.6917019987886129,
'p': 0.5511583011583011,
'r': 0.9284552845528455},
{'a': 0.8419117647058824,
'f1': 0.9007692307692309,
'p': 0.8334519572953737,
'r': 0.9799163179916318},
{'a': 0.6880370125578321,
'f1': 0.7345331833520811,
'p': 0.5936363636363636,
'r': 0.9631268436578171},
{'a': 0.6392536281962682,
'f1': 0.6477732793522268,
'p': 0.4892966360856269,
'r': 0.9580838323353293},
{'a': 0.878755364806867,
'f1': 0.916358253145818,
'p': 0.8645251396648045,
'r': 0.9748031496062992}],
'www.forbes.com;2005': [{'a': 0.7722736992159658,
'f1': 0.3419155509783728,
'p': 0.21727748691099477,
'r': 0.8019323671497585},
{'a': 0.8083989501312336,
'f1': 0.7436797752808989,
'p': 0.6299821534800714,
'r': 0.9074550128534704},
{'a': 0.7874430024552789,
'f1': 0.4757785467128028,
'p': 0.33454987834549876,
'r': 0.8233532934131736},
{'a': 0.7634902411021814,
'f1': 0.556989247311828,
'p': 0.4088397790055249,
'r': 0.8735244519392917},
{'a': 0.8061097256857855,
'f1': 0.6509539842873177,
'p': 0.5056669572798606,
'r': 0.9133858267716536},
{'a': 0.8057324840764332,
'f1': 0.6561443066516347,
'p': 0.5091863517060368,
'r': 0.9223454833597464},
{'a': 0.7756692646560488,
'f1': 0.42634315424610053,
'p': 0.28941176470588237,
'r': 0.8092105263157895},
{'a': 0.8001235712079086,
'f1': 0.6200822078684675,
'p': 0.47782805429864256,
'r': 0.882943143812709},
{'a': 0.8255127844900253,
'f1': 0.7563750490388387,
'p': 0.6313032089063523,
'r': 0.9432485322896281},
{'a': 0.697562776957164,
'f1': 0.13149522799575822,
'p': 0.07200929152148665,
'r': 0.7560975609756098},
{'a': 0.7232704402515723,
'f1': 0.36231884057971014,
'p': 0.2334267040149393,
'r': 0.8090614886731392},
{'a': 0.7841675541356052,
'f1': 0.42857142857142855,
'p': 0.289707750952986,
'r': 0.8231046931407943},
{'a': 0.7805970149253731,
'f1': 0.3130841121495327,
'p': 0.1976401179941003,
'r': 0.7528089887640449},
{'a': 0.7898820608317815,
'f1': 0.6461055933089388,
'p': 0.5065573770491804,
'r': 0.8917748917748918},
{'a': 0.7988912842623961,
'f1': 0.6456863809007053,
'p': 0.5116079105760963,
'r': 0.875},
{'a': 0.778064041221936,
'f1': 0.3751295336787565,
'p': 0.24659400544959129,
'r': 0.7835497835497836},
{'a': 0.8187765783447825,
'f1': 0.6767794632438741,
'p': 0.5380333951762524,
'r': 0.9119496855345912},
{'a': 0.8288753799392097,
'f1': 0.733806146572104,
'p': 0.6110236220472441,
'r': 0.9183431952662722},
{'a': 0.8055476931786018,
'f1': 0.6966887417218544,
'p': 0.5696750902527076,
'r': 0.8965909090909091},
{'a': 0.8151183970856102,
'f1': 0.694123556002009,
'p': 0.5608766233766234,
'r': 0.9104084321475626},
{'a': 0.753903345724907,
'f1': 0.6413867822318526,
'p': 0.5,
'r': 0.8942598187311178},
{'a': 0.7940993788819876,
'f1': 0.5864004990642545,
'p': 0.4467680608365019,
'r': 0.852994555353902},
{'a': 0.7974877450980392,
'f1': 0.6333887964503605,
'p': 0.4930915371329879,
'r': 0.8852713178294573},
{'a': 0.8281089414182939,
'f1': 0.7850947638933505,
'p': 0.6788888888888889,
'r': 0.9306930693069307},
{'a': 0.7907412362857907,
'f1': 0.6996927803379416,
'p': 0.5630407911001236,
'r': 0.9239350912778904}],
'www.forbes.com;2010': [{'a': 0.8137171888230313,
'f1': 0.5849056603773585,
'p': 0.4403409090909091,
'r': 0.8707865168539326},
{'a': 0.864447660395562,
'f1': 0.8663813599619591,
'p': 0.8148479427549195,
'r': 0.9248730964467005},
{'a': 0.8325673013788575,
'f1': 0.7671232876712328,
'p': 0.6687898089171974,
'r': 0.8993576017130621},
{'a': 0.8565763384005287,
'f1': 0.8067675868210151,
'p': 0.7248,
'r': 0.9096385542168675},
{'a': 0.8740079365079365,
'f1': 0.8798486281929989,
'p': 0.8325872873769025,
'r': 0.9327983951855566},
{'a': 0.8449367088607594,
'f1': 0.8019401778496361,
'p': 0.7294117647058823,
'r': 0.8904847396768402},
{'a': 0.7433414043583535,
'f1': 0.6535947712418301,
'p': 0.5188067444876784,
'r': 0.8830022075055187},
{'a': 0.8580931263858093,
'f1': 0.8433292533659731,
'p': 0.7613259668508288,
'r': 0.9451303155006858},
{'a': 0.835093896713615,
'f1': 0.8066070199587062,
'p': 0.7306733167082294,
'r': 0.9001536098310292},
{'a': 0.824655894673848,
'f1': 0.7497865072587533,
'p': 0.6353111432706223,
'r': 0.9145833333333333},
{'a': 0.8580968280467446,
'f1': 0.8513119533527698,
'p': 0.7692307692307693,
'r': 0.9530026109660574},
{'a': 0.8564593301435407,
'f1': 0.8554603854389722,
'p': 0.7833333333333333,
'r': 0.9422169811320755},
{'a': 0.8377334191886671,
'f1': 0.7906976744186046,
'p': 0.6928675400291121,
'r': 0.9206963249516441},
{'a': 0.8434430964760254,
'f1': 0.8222950819672131,
'p': 0.7376470588235294,
'r': 0.9288888888888889},
{'a': 0.8225419664268585,
'f1': 0.7743902439024389,
'p': 0.6773333333333333,
'r': 0.9039145907473309},
{'a': 0.8393186003683242,
'f1': 0.8324531925108017,
'p': 0.7611940298507462,
'r': 0.9184322033898306},
{'a': 0.839344262295082,
'f1': 0.7860262008733625,
'p': 0.6859756097560976,
'r': 0.9202453987730062},
{'a': 0.8418230563002681,
'f1': 0.7954939341421143,
'p': 0.7039877300613497,
'r': 0.9143426294820717},
{'a': 0.8094968908988129,
'f1': 0.7230895645028759,
'p': 0.625,
'r': 0.8576998050682261},
{'a': 0.8236009732360098,
'f1': 0.7668810289389068,
'p': 0.654320987654321,
'r': 0.9262135922330097},
{'a': 0.852112676056338,
'f1': 0.8229885057471265,
'p': 0.7376373626373627,
'r': 0.9306759098786829},
{'a': 0.5662721893491124,
'f1': 0.18826135105204872,
'p': 0.10745891276864729,
'r': 0.7589285714285714},
{'a': 0.6209535020600353,
'f1': 0.3611111111111111,
'p': 0.2266500622665006,
'r': 0.8878048780487805},
{'a': 0.8299637117677553,
'f1': 0.8163493840985443,
'p': 0.7282717282717283,
'r': 0.9286624203821656},
{'a': 0.8589580686149937,
'f1': 0.832579185520362,
'p': 0.745945945945946,
'r': 0.9419795221843004}],
'www.forbes.com;2015': [{'a': 0.5165745856353591,
'f1': 0.5577085088458299,
'p': 0.3956963538553497,
'r': 0.9443651925820257},
{'a': 0.4187856772184743,
'f1': 0.4004282655246253,
'p': 0.25424881033310676,
'r': 0.9420654911838791},
{'a': 0.43794242567248703,
'f1': 0.39016897081413215,
'p': 0.2514851485148515,
'r': 0.8698630136986302},
{'a': 0.44252288911495424,
'f1': 0.4350515463917526,
'p': 0.28455832771409306,
'r': 0.9234135667396062},
{'a': 0.4253184098803551,
'f1': 0.45196908354803095,
'p': 0.29878345498783454,
'r': 0.9274924471299094},
{'a': 0.48127421437795953,
'f1': 0.516258530710558,
'p': 0.3540748898678414,
'r': 0.9525925925925925},
{'a': 0.5458333333333333,
'f1': 0.6062138728323698,
'p': 0.44580233793836344,
'r': 0.9469525959367946},
{'a': 0.45315161839863716,
'f1': 0.4992199687987519,
'p': 0.3382663847780127,
'r': 0.9523809523809523},
{'a': 0.3860444041685546,
'f1': 0.38156093108169786,
'p': 0.23831242873432154,
'r': 0.9565217391304348},
{'a': 0.337013364323068,
'f1': 0.25278323510150624,
'p': 0.14744079449961803,
'r': 0.8853211009174312},
{'a': 0.37746806039488967,
'f1': 0.2776280323450135,
'p': 0.16375198728139906,
'r': 0.911504424778761},
{'a': 0.5217065868263473,
'f1': 0.5804333552199606,
'p': 0.425,
'r': 0.9151138716356108},
{'a': 0.5336898395721925,
'f1': 0.6088516746411483,
'p': 0.445124617402711,
'r': 0.9631031220435194},
{'a': 0.5185487158581329,
'f1': 0.5765507350304769,
'p': 0.4125192406362237,
'r': 0.9571428571428572},
{'a': 0.4438573315719947,
'f1': 0.46460364561254774,
'p': 0.3075196408529742,
'r': 0.949740034662045},
{'a': 0.44591937069813176,
'f1': 0.43621810905452724,
'p': 0.2847811887655127,
'r': 0.9316239316239316},
{'a': 0.4135643199410247,
'f1': 0.426253155427335,
'p': 0.2755244755244755,
'r': 0.9410828025477707},
{'a': 0.3984418475236505,
'f1': 0.3205531112507856,
'p': 0.1946564885496183,
'r': 0.9074733096085409},
{'a': 0.4356609574000878,
'f1': 0.4473118279569892,
'p': 0.29328821206993794,
'r': 0.9420289855072463},
{'a': 0.5367298578199052,
'f1': 0.5824136703453187,
'p': 0.42405391394504927,
'r': 0.9295454545454546},
{'a': 0.34825381210034434,
'f1': 0.24501424501424504,
'p': 0.1430472388556221,
'r': 0.8531746031746031},
{'a': 0.48039678790741613,
'f1': 0.49817518248175185,
'p': 0.3387096774193548,
'r': 0.9413793103448276},
{'a': 0.24763257575757575,
'f1': 0.08625646923519263,
'p': 0.04550970873786408,
'r': 0.8241758241758241},
{'a': 0.5140405616224649,
'f1': 0.5747440273037542,
'p': 0.41234084231145934,
'r': 0.9481981981981982},
{'a': 0.47301736274049744,
'f1': 0.488848429676832,
'p': 0.3312769895126465,
'r': 0.9322916666666666}],
'www.foxnews.com;2000': [{'a': 0.9264705882352942,
'f1': 0.8852459016393442,
'p': 0.8148893360160966,
'r': 0.9688995215311005},
{'a': 0.8859709744298548,
'f1': 0.729950900163666,
'p': 0.6043360433604336,
'r': 0.9214876033057852},
{'a': 0.9341161121983039,
'f1': 0.9105403011514615,
'p': 0.8538205980066446,
'r': 0.9753320683111955},
{'a': 0.8595588235294118,
'f1': 0.8409658617818485,
'p': 0.7829457364341085,
'r': 0.908273381294964},
{'a': 0.9171270718232044,
'f1': 0.8358862144420133,
'p': 0.7262357414448669,
'r': 0.9845360824742269},
{'a': 0.9072489869428185,
'f1': 0.9286703601108033,
'p': 0.879344262295082,
'r': 0.9838591342626559},
{'a': 0.774294670846395,
'f1': 0.7842157842157843,
'p': 0.6658184902459712,
'r': 0.9538274605103281},
{'a': 0.9191039203484754,
'f1': 0.8958333333333334,
'p': 0.8196480938416423,
'r': 0.9876325088339223},
{'a': 0.9376114081996435,
'f1': 0.9482095294465818,
'p': 0.9308541545613016,
'r': 0.9662243667068757},
{'a': 0.7267960146827478,
'f1': 0.6424159231297186,
'p': 0.4854771784232365,
'r': 0.949290060851927},
{'a': 0.9351620947630923,
'f1': 0.8414634146341464,
'p': 0.7419354838709677,
'r': 0.971830985915493},
{'a': 0.8618753509264458,
'f1': 0.8183161004431314,
'p': 0.7102564102564103,
'r': 0.9651567944250871},
{'a': 0.9335347432024169,
'f1': 0.8784530386740332,
'p': 0.8153846153846154,
'r': 0.9520958083832335},
{'a': 0.9410112359550562,
'f1': 0.9094827586206897,
'p': 0.8423153692614771,
'r': 0.9882903981264637},
{'a': 0.2631578947368421,
'f1': 0.3,
'p': 0.17647058823529413,
'r': 1.0},
{'a': 0.8822149225715626,
'f1': 0.8766584766584767,
'p': 0.7971403038427167,
'r': 0.9737991266375546},
{'a': 0.8908134859263842,
'f1': 0.8866045615162224,
'p': 0.8263473053892215,
'r': 0.9563409563409564},
{'a': 0.9141274238227147,
'f1': 0.9042618900555899,
'p': 0.8591549295774648,
'r': 0.954367666232073},
{'a': 0.9432773109243697,
'f1': 0.9137380191693291,
'p': 0.8545816733067729,
'r': 0.9816933638443935},
{'a': 0.9842942756767927,
'f1': 0.9915574316818484,
'p': 0.9919982218270726,
'r': 0.9911170330890517},
{'a': 0.9413938753959873,
'f1': 0.939706681151548,
'p': 0.9086134453781513,
'r': 0.9730033745781778},
{'a': 0.8820598006644518,
'f1': 0.9093231162196678,
'p': 0.8506571087216248,
'r': 0.9766803840877915},
{'a': 0.9623188405797102,
'f1': 0.9720279720279721,
'p': 0.9683815648445874,
'r': 0.9757019438444925},
{'a': 0.9213724088634739,
'f1': 0.8708920187793427,
'p': 0.7893617021276595,
'r': 0.9712041884816754},
{'a': 0.9498090561920349,
'f1': 0.9358437935843794,
'p': 0.9129251700680272,
'r': 0.9599427753934192}],
'www.foxnews.com;2005': [{'a': 0.5945249597423511,
'f1': 0.6647137150466046,
'p': 0.5012048192771085,
'r': 0.9865612648221344},
{'a': 0.42956579383493537,
'f1': 0.45347729437916795,
'p': 0.2955298013245033,
'r': 0.9740791268758526},
{'a': 0.45368620037807184,
'f1': 0.498554077501446,
'p': 0.33475728155339807,
'r': 0.9762174405436014},
{'a': 0.46420472951085195,
'f1': 0.5100710900473933,
'p': 0.34620024125452353,
'r': 0.968503937007874},
{'a': 0.40185581727337616,
'f1': 0.3909883720930233,
'p': 0.24510250569476083,
'r': 0.9658886894075404},
{'a': 0.34892638036809814,
'f1': 0.26493506493506497,
'p': 0.1536144578313253,
'r': 0.9622641509433962},
{'a': 0.5415335463258786,
'f1': 0.6067415730337079,
'p': 0.4392857142857143,
'r': 0.9805137289636847},
{'a': 0.594211059804032,
'f1': 0.37371806014253434,
'p': 0.23915461624026696,
'r': 0.8545310015898251},
{'a': 0.7491712707182321,
'f1': 0.8370683467581147,
'p': 0.7308174348976466,
'r': 0.9794699514744307},
{'a': 0.5093998553868402,
'f1': 0.2987080103359173,
'p': 0.17762753534111864,
'r': 0.9383116883116883},
{'a': 0.5293867120954003,
'f1': 0.5454545454545454,
'p': 0.3792906178489702,
'r': 0.9707174231332357},
{'a': 0.43463579353593823,
'f1': 0.34525139664804466,
'p': 0.21020408163265306,
'r': 0.965625},
{'a': 0.4492753623188406,
'f1': 0.4169675090252708,
'p': 0.2661290322580645,
'r': 0.9625},
{'a': 0.3810359964881475,
'f1': 0.2914572864321608,
'p': 0.17231134878193702,
'r': 0.9446254071661238},
{'a': 0.46642995480955457,
'f1': 0.5125331760542612,
'p': 0.34746101559376247,
'r': 0.9764044943820225},
{'a': 0.44645745713361373,
'f1': 0.484483278095812,
'p': 0.32250300842358604,
'r': 0.9733656174334141},
{'a': 0.4555626598465473,
'f1': 0.49719515795689406,
'p': 0.333201424614167,
'r': 0.9790697674418605},
{'a': 0.6765451355039969,
'f1': 0.7382867960246096,
'p': 0.5918057663125948,
'r': 0.9811320754716981},
{'a': 0.38055883510428967,
'f1': 0.32155172413793104,
'p': 0.19286452947259566,
'r': 0.966321243523316},
{'a': 0.39348219674109836,
'f1': 0.41637630662020914,
'p': 0.26535899333826796,
'r': 0.9663072776280324},
{'a': 0.6564003849855631,
'f1': 0.6965145933692265,
'p': 0.5462222222222223,
'r': 0.9609069585613761},
{'a': 0.45029051000645576,
'f1': 0.48812744214006615,
'p': 0.3258426966292135,
'r': 0.9724550898203593},
{'a': 0.3883287503819126,
'f1': 0.40593471810089027,
'p': 0.2565641410352588,
'r': 0.9715909090909091},
{'a': 0.5090241664117467,
'f1': 0.5096241979835013,
'p': 0.3469217970049917,
'r': 0.9597238204833142},
{'a': 0.454574951330305,
'f1': 0.49321676213445886,
'p': 0.3305050505050505,
'r': 0.9714964370546318}],
'www.foxnews.com;2010': [{'a': 0.6715490457168221,
'f1': 0.30970149253731344,
'p': 0.1914648212226067,
'r': 0.8097560975609757},
{'a': 0.4688393367638651,
'f1': 0.4962039045553145,
'p': 0.3397697734868177,
'r': 0.9195979899497487},
{'a': 0.22670949977053695,
'f1': 0.17035942885278188,
'p': 0.0939451534075482,
'r': 0.9129287598944591},
{'a': 0.6684587813620072,
'f1': 0.2644135188866799,
'p': 0.15947242206235013,
'r': 0.7732558139534884},
{'a': 0.6863241678726484,
'f1': 0.5126475548060708,
'p': 0.36893203883495146,
'r': 0.8397790055248618},
{'a': 0.46263864565090485,
'f1': 0.47684001136686555,
'p': 0.32418856259659967,
'r': 0.9011815252416756},
{'a': 0.4190779014308426,
'f1': 0.3645217391304348,
'p': 0.22912111937035418,
'r': 0.891156462585034},
{'a': 0.41920152091254753,
'f1': 0.39564787339268054,
'p': 0.25510204081632654,
'r': 0.8810572687224669},
{'a': 0.6591154261057174,
'f1': 0.46979865771812085,
'p': 0.32407407407407407,
'r': 0.8536585365853658},
{'a': 0.4547301218804411,
'f1': 0.4488119683191552,
'p': 0.3009441384736428,
'r': 0.8823529411764706},
{'a': 0.3645761543762922,
'f1': 0.26533864541832675,
'p': 0.1574468085106383,
'r': 0.8430379746835444},
{'a': 0.353988603988604,
'f1': 0.226768968456948,
'p': 0.13187902825979178,
'r': 0.8085106382978723},
{'a': 0.6649844720496895,
'f1': 0.4149152542372881,
'p': 0.2820276497695853,
'r': 0.7846153846153846},
{'a': 0.7508261731658956,
'f1': 0.7082043343653249,
'p': 0.5725907384230288,
'r': 0.9279918864097363},
{'a': 0.7042936288088643,
'f1': 0.6038961038961039,
'p': 0.4533426183844011,
'r': 0.9041666666666667},
{'a': 0.691169671752196,
'f1': 0.30416666666666664,
'p': 0.18646232439335889,
'r': 0.8248587570621468},
{'a': 0.4172043010752688,
'f1': 0.39528211667197966,
'p': 0.25524907369287775,
'r': 0.8757062146892656},
{'a': 0.7634241245136186,
'f1': 0.776908023483366,
'p': 0.6597424179476526,
'r': 0.9446757882212968},
{'a': 0.672885032537961,
'f1': 0.3157894736842105,
'p': 0.19863013698630136,
'r': 0.7699115044247787},
{'a': 0.7183054062598798,
'f1': 0.6631379962192817,
'p': 0.5204747774480712,
'r': 0.9135416666666667},
{'a': 0.6908192671881432,
'f1': 0.4522246535375638,
'p': 0.3066271018793274,
'r': 0.8611111111111112},
{'a': 0.37761294672960216,
'f1': 0.3007575757575757,
'p': 0.18169336384439358,
'r': 0.8725274725274725},
{'a': 0.7358695652173913,
'f1': 0.7344262295081968,
'p': 0.6092475067996374,
'r': 0.9243466299862448},
{'a': 0.3785095320623917,
'f1': 0.2532278217409413,
'p': 0.14887365328109697,
'r': 0.8467966573816156},
{'a': 0.7287234042553191,
'f1': 0.6998037933289731,
'p': 0.5706666666666667,
'r': 0.904480135249366}],
'www.foxnews.com;2015': [{'a': 0.7745159275452842,
'f1': 0.8359836437982735,
'p': 0.7437348423605498,
'r': 0.9543568464730291},
{'a': 0.7285714285714285,
'f1': 0.6603098927294399,
'p': 0.5506958250497018,
'r': 0.8244047619047619},
{'a': 0.5466297322253001,
'f1': 0.38238993710691827,
'p': 0.24675324675324675,
'r': 0.8491620111731844},
{'a': 0.6464646464646465,
'f1': 0.5614035087719298,
'p': 0.41025641025641024,
'r': 0.8888888888888888},
{'a': 0.6268199233716475,
'f1': 0.6088353413654619,
'p': 0.45172824791418353,
'r': 0.9334975369458128},
{'a': 0.6711003627569528,
'f1': 0.5310344827586208,
'p': 0.375609756097561,
'r': 0.9058823529411765},
{'a': 0.6937229437229437,
'f1': 0.6271409749670619,
'p': 0.4731610337972167,
'r': 0.9296875},
{'a': 0.5841081994928149,
'f1': 0.5060240963855422,
'p': 0.35492957746478876,
'r': 0.8811188811188811},
{'a': 0.7967637540453074,
'f1': 0.8243847874720358,
'p': 0.7347956131605184,
'r': 0.9388535031847134},
{'a': 0.5411167512690356,
'f1': 0.29595015576323985,
'p': 0.1806083650190114,
'r': 0.8189655172413793},
{'a': 0.7565217391304347,
'f1': 0.7254901960784315,
'p': 0.6095551894563427,
'r': 0.8958837772397095},
{'a': 0.6686967113276492,
'f1': 0.6243093922651933,
'p': 0.465979381443299,
'r': 0.9456066945606695},
{'a': 0.6580278128950695,
'f1': 0.6896156052782559,
'p': 0.5429087624209575,
'r': 0.9449685534591195},
{'a': 0.739424703891709,
'f1': 0.7535999999999999,
'p': 0.6263297872340425,
'r': 0.9457831325301205},
{'a': 0.7830310880829016,
'f1': 0.8414576431613819,
'p': 0.7458053691275168,
'r': 0.9652551574375678},
{'a': 0.7962206332992849,
'f1': 0.8488063660477455,
'p': 0.775623268698061,
'r': 0.9372384937238494},
{'a': 0.611185086551265,
'f1': 0.4982817869415807,
'p': 0.34441805225653205,
'r': 0.9006211180124224},
{'a': 0.7938701923076923,
'f1': 0.8536064874093042,
'p': 0.7651109410864575,
'r': 0.9652509652509652},
{'a': 0.7580645161290323,
'f1': 0.7636103151862464,
'p': 0.6604708798017348,
'r': 0.9049235993208828},
{'a': 0.7432727272727273,
'f1': 0.7997731140102099,
'p': 0.6844660194174758,
'r': 0.9618008185538881},
{'a': 0.7615971814445097,
'f1': 0.777656078860898,
'p': 0.7157258064516129,
'r': 0.8513189448441247},
{'a': 0.7480586712683348,
'f1': 0.7614379084967321,
'p': 0.6383561643835617,
'r': 0.9433198380566802},
{'a': 0.7497708524289642,
'f1': 0.7016393442622951,
'p': 0.5857664233576643,
'r': 0.8746594005449592},
{'a': 0.7243835616438357,
'f1': 0.77851166886834,
'p': 0.6601941747572816,
'r': 0.9484978540772532},
{'a': 0.6572008113590264,
'f1': 0.5917874396135265,
'p': 0.43672014260249553,
'r': 0.9176029962546817},
{'a': 0.6946610677864428,
'f1': 0.7383033419023136,
'p': 0.6059071729957806,
'r': 0.9447368421052632}],
'www.latimes.com;2000': [{'a': 0.8453548759376803,
'f1': 0.8790613718411553,
'p': 0.8069594034797017,
'r': 0.9653121902874133},
{'a': 0.8693647540983607,
'f1': 0.9046728971962618,
'p': 0.8437935843793585,
'r': 0.9750201450443191},
{'a': 0.7479674796747967,
'f1': 0.6545961002785514,
'p': 0.5075593952483801,
'r': 0.9215686274509803},
{'a': 0.8178861788617886,
'f1': 0.8691588785046729,
'p': 0.7856388595564942,
'r': 0.9725490196078431},
{'a': 0.8781540504648074,
'f1': 0.9213290460878886,
'p': 0.8682828282828283,
'r': 0.9812785388127854},
{'a': 0.8349256900212314,
'f1': 0.8688317165752848,
'p': 0.7996894409937888,
'r': 0.951061865189289},
{'a': 0.8299029126213592,
'f1': 0.8649815043156596,
'p': 0.8007990867579908,
'r': 0.9403485254691689},
{'a': 0.5979413599500936,
'f1': 0.6749054224464061,
'p': 0.5150115473441108,
'r': 0.9787856620336504},
{'a': 0.8804824561403509,
'f1': 0.8935893263911487,
'p': 0.8412990196078431,
'r': 0.9528105482303956},
{'a': 0.7932367149758454,
'f1': 0.7562642369020501,
'p': 0.6347992351816444,
'r': 0.9352112676056338},
{'a': 0.7571371129875352,
'f1': 0.7440677966101695,
'p': 0.6021947873799726,
'r': 0.9733924611973392},
{'a': 0.8675252989880404,
'f1': 0.9152608866222048,
'p': 0.8570903747244673,
'r': 0.9819023569023569},
{'a': 0.7767714437932871,
'f1': 0.8091116173120729,
'p': 0.7086991221069433,
'r': 0.9426751592356688},
{'a': 0.8514308114464916,
'f1': 0.896419786827002,
'p': 0.8414571575166753,
'r': 0.9590643274853801},
{'a': 0.8480890179003386,
'f1': 0.8884943181818182,
'p': 0.8208661417322834,
'r': 0.968266253869969},
{'a': 0.8571428571428571,
'f1': 0.8954562246028814,
'p': 0.8278688524590164,
'r': 0.9750603378921963},
{'a': 0.8526698350573106,
'f1': 0.9063111111111111,
'p': 0.8471252907942838,
'r': 0.974388379204893},
{'a': 0.8485680659081993,
'f1': 0.8810110974106042,
'p': 0.8222094361334867,
'r': 0.9488711819389111},
{'a': 0.830423940149626,
'f1': 0.8875767595654227,
'p': 0.8148308759757156,
'r': 0.9745850622406639},
{'a': 0.8505882352941176,
'f1': 0.8957592339261286,
'p': 0.8399179066187789,
'r': 0.9595545134818289},
{'a': 0.8473091364205256,
'f1': 0.8936664729808251,
'p': 0.8277717976318623,
'r': 0.9709595959595959},
{'a': 0.8113207547169812,
'f1': 0.8582848837209301,
'p': 0.7703848662752772,
'r': 0.9688269073010665},
{'a': 0.8018092105263158,
'f1': 0.7853962600178094,
'p': 0.6879875195007801,
'r': 0.9149377593360996},
{'a': 0.8428701180744778,
'f1': 0.8875162548764629,
'p': 0.8149253731343283,
'r': 0.974304068522484},
{'a': 0.8688235294117647,
'f1': 0.9166355140186916,
'p': 0.8600491055769905,
'r': 0.9811924769907964}],
'www.latimes.com;2005': [{'a': 0.7144944476914086,
'f1': 0.7110322389825496,
'p': 0.5789980732177264,
'r': 0.9210727969348659},
{'a': 0.5932407966203983,
'f1': 0.49851190476190477,
'p': 0.34643226473629785,
'r': 0.8885941644562334},
{'a': 0.723404255319149,
'f1': 0.7098426637394805,
'p': 0.5715969357690042,
'r': 0.9362934362934363},
{'a': 0.7397157816005984,
'f1': 0.7070707070707071,
'p': 0.5691056910569106,
'r': 0.9333333333333333},
{'a': 0.7457142857142857,
'f1': 0.692467173462336,
'p': 0.5529801324503312,
'r': 0.9260628465804066},
{'a': 0.8725598526703499,
'f1': 0.9085623678646935,
'p': 0.8612224448897795,
'r': 0.9614093959731543},
{'a': 0.8076045627376426,
'f1': 0.7068366164542296,
'p': 0.5888030888030888,
'r': 0.8840579710144928},
{'a': 0.6728395061728395,
'f1': 0.7034688549048862,
'p': 0.5670475045099218,
'r': 0.9263261296660118},
{'a': 0.7836746466685895,
'f1': 0.8155435317265126,
'p': 0.709456568249893,
'r': 0.9589358010410642},
{'a': 0.7421426157485637,
'f1': 0.8012503256056265,
'p': 0.6931050022532672,
'r': 0.9493827160493827},
{'a': 0.7032887603903144,
'f1': 0.7242190124286193,
'p': 0.5893931109896118,
'r': 0.9390243902439024},
{'a': 0.8528057308545114,
'f1': 0.8973230220107079,
'p': 0.838,
'r': 0.9656850192061459},
{'a': 0.718705035971223,
'f1': 0.6864474739374499,
'p': 0.5434920634920635,
'r': 0.9314472252448314},
{'a': 0.7126022913256956,
'f1': 0.704177897574124,
'p': 0.5808782657031685,
'r': 0.8939264328485885},
{'a': 0.7624615384615384,
'f1': 0.7617283950617285,
'p': 0.6437141366718832,
'r': 0.9327286470143613},
{'a': 0.7425531914893617,
'f1': 0.705596107055961,
'p': 0.571992110453649,
'r': 0.9206349206349206},
{'a': 0.7760375880971025,
'f1': 0.811965811965812,
'p': 0.7105868814729575,
'r': 0.9470858895705522},
{'a': 0.741,
'f1': 0.7106145251396647,
'p': 0.5824175824175825,
'r': 0.9111747851002865},
{'a': 0.645644283121597,
'f1': 0.6694879390605162,
'p': 0.5203947368421052,
'r': 0.9383155397390273},
{'a': 0.5956842703009654,
'f1': 0.375438596491228,
'p': 0.2420814479638009,
'r': 0.8359375},
{'a': 0.6757904672015101,
'f1': 0.6945309026233882,
'p': 0.5496129486277269,
'r': 0.9432367149758454},
{'a': 0.7909712722298221,
'f1': 0.8473831402317219,
'p': 0.7550729797080812,
'r': 0.9654073736913974},
{'a': 0.9035483870967742,
'f1': 0.9223174850610549,
'p': 0.8942065491183879,
'r': 0.9522532188841202},
{'a': 0.7048286604361371,
'f1': 0.734965034965035,
'p': 0.5995436394751854,
'r': 0.9494128274616079},
{'a': 0.5814771395076201,
'f1': 0.3532608695652174,
'p': 0.22209567198177677,
'r': 0.8628318584070797}],
'www.latimes.com;2010': [{'a': 0.6434885091337654,
'f1': 0.5406226271829916,
'p': 0.4052361980648833,
'r': 0.8118586088939567},
{'a': 0.5513347022587269,
'f1': 0.44331210191082804,
'p': 0.3040629095674967,
'r': 0.81786133960047},
{'a': 0.584457061745919,
'f1': 0.35124653739612194,
'p': 0.22855082912761354,
'r': 0.7583732057416268},
{'a': 0.5835829838226483,
'f1': 0.42179700499168055,
'p': 0.2887243735763098,
'r': 0.7824074074074074},
{'a': 0.8558427375318529,
'f1': 0.86843853820598,
'p': 0.818922305764411,
'r': 0.9243281471004243},
{'a': 0.8492879746835443,
'f1': 0.8515777171795871,
'p': 0.7937545388525781,
'r': 0.9184873949579831},
{'a': 0.5744389709906951,
'f1': 0.4821844821844822,
'p': 0.3395872420262664,
'r': 0.8312284730195177},
{'a': 0.6701839826839827,
'f1': 0.45213483146067424,
'p': 0.3179519595448799,
'r': 0.7822706065318819},
{'a': 0.8363713437897414,
'f1': 0.8275245755138516,
'p': 0.763396537510305,
'r': 0.9034146341463415},
{'a': 0.8205383848454636,
'f1': 0.7707006369426752,
'p': 0.6774916013437849,
'r': 0.8936484490398818},
{'a': 0.6184008200922604,
'f1': 0.15732880588568196,
'p': 0.08904548366431775,
'r': 0.6747572815533981},
{'a': 0.8341211860764933,
'f1': 0.8232600732600732,
'p': 0.7548278757346767,
'r': 0.905337361530715},
{'a': 0.6610064834825563,
'f1': 0.5525672371638142,
'p': 0.4200743494423792,
'r': 0.8071428571428572},
{'a': 0.6263546798029557,
'f1': 0.5384849406753879,
'p': 0.4085872576177285,
'r': 0.7894736842105263},
{'a': 0.6586608442503639,
'f1': 0.42781618544123623,
'p': 0.29567172568858907,
'r': 0.7735294117647059},
{'a': 0.6219718309859155,
'f1': 0.2966457023060796,
'p': 0.18460534898891062,
'r': 0.7546666666666667},
{'a': 0.7410400562192551,
'f1': 0.5514303104077906,
'p': 0.40338379341050756,
'r': 0.8711538461538462},
{'a': 0.6353760445682451,
'f1': 0.551558752997602,
'p': 0.4164511122607346,
'r': 0.8164300202839757},
{'a': 0.49702675916749256,
'f1': 0.3636363636363636,
'p': 0.23396530859217426,
'r': 0.8157524613220816},
{'a': 0.6007430694484138,
'f1': 0.4804760133878765,
'p': 0.34143763213530653,
'r': 0.8105395232120451},
{'a': 0.8395931142410016,
'f1': 0.8439878234398783,
'p': 0.7882018479033405,
'r': 0.9082719082719083},
{'a': 0.577935522034901,
'f1': 0.4685288640595904,
'p': 0.3255693581780538,
'r': 0.8353253652058433},
{'a': 0.6511403790555734,
'f1': 0.5164737310774711,
'p': 0.38334434897554526,
'r': 0.791268758526603},
{'a': 0.835229858898498,
'f1': 0.8126293995859214,
'p': 0.7309124767225326,
'r': 0.914918414918415},
{'a': 0.6451033243486074,
'f1': 0.22599608099281515,
'p': 0.13494539781591264,
'r': 0.6947791164658634}],
'www.latimes.com;2015': [{'a': 0.5961266610121572,
'f1': 0.17212402202260216,
'p': 0.0963035019455253,
'r': 0.8092643051771117},
{'a': 0.6113038450419196,
'f1': 0.17692072237526785,
'p': 0.09917638984214139,
'r': 0.8186968838526912},
{'a': 0.5981166043484282,
'f1': 0.1965669988925803,
'p': 0.11145996860282574,
'r': 0.8313817330210773},
{'a': 0.31709791983764585,
'f1': 0.3525733525733526,
'p': 0.21874067442554462,
'r': 0.9083023543990086},
{'a': 0.2518767797048926,
'f1': 0.21934089681253374,
'p': 0.12608695652173912,
'r': 0.8423236514522822},
{'a': 0.6048069919883466,
'f1': 0.13900349095525233,
'p': 0.07580477673935618,
'r': 0.8358778625954199},
{'a': 0.33791469194312795,
'f1': 0.3923444976076555,
'p': 0.25076452599388377,
'r': 0.9010989010989011},
{'a': 0.607795545402627,
'f1': 0.18943641192092062,
'p': 0.10667996011964108,
'r': 0.8447368421052631},
{'a': 0.5982575024201355,
'f1': 0.20082530949105915,
'p': 0.11416953393806693,
'r': 0.8333333333333334},
{'a': 0.608915502328676,
'f1': 0.29503478052290716,
'p': 0.1777456647398844,
'r': 0.8674188998589563},
{'a': 0.5890063424947146,
'f1': 0.12327119663259171,
'p': 0.06712508185985593,
'r': 0.7536764705882353},
{'a': 0.5902777777777778,
'f1': 0.13624141021810576,
'p': 0.07433974567981741,
'r': 0.8142857142857143},
{'a': 0.2543318649045521,
'f1': 0.19371228961575104,
'p': 0.11026753434562545,
'r': 0.7963446475195822},
{'a': 0.6019872249822569,
'f1': 0.1762632197414806,
'p': 0.09874917709019092,
'r': 0.819672131147541},
{'a': 0.6036972848064702,
'f1': 0.15931372549019607,
'p': 0.08807588075880758,
'r': 0.8333333333333334},
{'a': 0.6040249022730563,
'f1': 0.14930015552099532,
'p': 0.08241758241758242,
'r': 0.7920792079207921},
{'a': 0.6103078024337867,
'f1': 0.18502994011976048,
'p': 0.1043918918918919,
'r': 0.8131578947368421},
{'a': 0.6041463754000278,
'f1': 0.2099416828658706,
'p': 0.12088263511352734,
'r': 0.7974683544303798},
{'a': 0.6030157642220699,
'f1': 0.2301967038809144,
'p': 0.13388991960420532,
'r': 0.8200757575757576},
{'a': 0.25313531353135316,
'f1': 0.19494841693347564,
'p': 0.10973167801361634,
'r': 0.8726114649681529},
{'a': 0.613965370422935,
'f1': 0.22639362912400457,
'p': 0.13002286834367854,
'r': 0.8747252747252747},
{'a': 0.6122367898291617,
'f1': 0.31620737972909857,
'p': 0.1924936025021325,
'r': 0.8849673202614379},
{'a': 0.28397508493771234,
'f1': 0.27097146151628715,
'p': 0.16084873374401096,
'r': 0.8592321755027422},
{'a': 0.22352004247411733,
'f1': 0.1881765195670275,
'p': 0.10587133041848844,
'r': 0.8453865336658354},
{'a': 0.2530933633295838,
'f1': 0.20952380952380953,
'p': 0.11972789115646258,
'r': 0.8380952380952381}],
'www.nymag.com;2000': [{'a': 0.9022515101592532,
'f1': 0.9079627714581179,
'p': 0.8582600195503421,
'r': 0.9637760702524698},
{'a': 0.9118967452300786,
'f1': 0.9150892374256355,
'p': 0.869475847893114,
'r': 0.9657534246575342},
{'a': 0.9091988130563798,
'f1': 0.9208484221417486,
'p': 0.8820614469772051,
'r': 0.9632034632034632},
{'a': 0.9014423076923077,
'f1': 0.8829686013320647,
'p': 0.8197879858657244,
'r': 0.9567010309278351},
{'a': 0.8762006403415155,
'f1': 0.7433628318584071,
'p': 0.6176470588235294,
'r': 0.9333333333333333},
{'a': 0.9168218497827436,
'f1': 0.925720620842572,
'p': 0.8882978723404256,
'r': 0.9664351851851852},
{'a': 0.9385113268608414,
'f1': 0.9563469270534175,
'p': 0.9412097230073487,
'r': 0.9719789842381786},
{'a': 0.9100850546780073,
'f1': 0.9193020719738276,
'p': 0.8817991631799164,
'r': 0.9601366742596811},
{'a': 0.9090372120496161,
'f1': 0.9200415368639667,
'p': 0.8842315369261478,
'r': 0.9588744588744589},
{'a': 0.4936023622047244,
'f1': 0.4248183342649524,
'p': 0.2779809802487198,
'r': 0.9004739336492891},
{'a': 0.8866719872306464,
'f1': 0.8667917448405255,
'p': 0.791095890410959,
'r': 0.9585062240663901},
{'a': 0.5929260450160772,
'f1': 0.48157248157248156,
'p': 0.3277591973244147,
'r': 0.9074074074074074},
{'a': 0.9185850645704661,
'f1': 0.9312470365101944,
'p': 0.9000916590284143,
'r': 0.9646365422396856},
{'a': 0.9226069246435845,
'f1': 0.9392486011191048,
'p': 0.9215686274509803,
'r': 0.9576202118989405},
{'a': 0.9131886477462438,
'f1': 0.926829268292683,
'p': 0.8853046594982079,
'r': 0.9724409448818898},
{'a': 0.9195822454308094,
'f1': 0.9354568315171836,
'p': 0.8971061093247589,
'r': 0.9772329246935202},
{'a': 0.594225721784777,
'f1': 0.5669467787114846,
'p': 0.4067524115755627,
'r': 0.9353049907578558},
{'a': 0.8875878220140515,
'f1': 0.8313817330210772,
'p': 0.7505285412262156,
'r': 0.931758530183727},
{'a': 0.9100055897149245,
'f1': 0.9131138694009715,
'p': 0.8775933609958506,
'r': 0.9516310461192351},
{'a': 0.8948513415518492,
'f1': 0.8706512042818911,
'p': 0.7973856209150327,
'r': 0.9587426326129665},
{'a': 0.9004065040650406,
'f1': 0.9014084507042254,
'p': 0.8474148802017655,
'r': 0.9627507163323782},
{'a': 0.8974970202622169,
'f1': 0.8967587034813924,
'p': 0.8346368715083798,
'r': 0.9688715953307393},
{'a': 0.8960434445306439,
'f1': 0.8826619964973731,
'p': 0.8168557536466775,
'r': 0.96},
{'a': 0.8713725490196078,
'f1': 0.8056872037914692,
'p': 0.718816067653277,
'r': 0.9164420485175202},
{'a': 0.8947745168217609,
'f1': 0.8940158615717376,
'p': 0.850480109739369,
'r': 0.9422492401215805}],
'www.nymag.com;2005': [{'a': 0.6452380952380953,
'f1': 0.6779074794638996,
'p': 0.5216234198270127,
'r': 0.9679012345679012},
{'a': 0.6519631604459525,
'f1': 0.6864628820960699,
'p': 0.5321597833446174,
'r': 0.966789667896679},
{'a': 0.6338983050847458,
'f1': 0.6612903225806452,
'p': 0.502724795640327,
'r': 0.9659685863874345},
{'a': 0.6628511966701353,
'f1': 0.6845180136319376,
'p': 0.5289691497366441,
'r': 0.9696551724137931},
{'a': 0.6602497398543185,
'f1': 0.6813079551000488,
'p': 0.5256024096385542,
'r': 0.9680998613037448},
{'a': 0.6131095123900879,
'f1': 0.4989648033126294,
'p': 0.34184397163120567,
'r': 0.9233716475095786},
{'a': 0.6332203389830509,
'f1': 0.6048210372534697,
'p': 0.44612068965517243,
'r': 0.9387755102040817},
{'a': 0.6230730979612134,
'f1': 0.6721453287197232,
'p': 0.5159362549800797,
'r': 0.9640198511166254},
{'a': 0.6969339622641509,
'f1': 0.7218614718614719,
'p': 0.5815170008718396,
'r': 0.9514978601997147},
{'a': 0.7438320209973753,
'f1': 0.7874564459930313,
'p': 0.6681448632668144,
'r': 0.95864262990456},
{'a': 0.723136495643756,
'f1': 0.7700964630225081,
'p': 0.6459878624409979,
'r': 0.9532338308457712},
{'a': 0.7215815485996705,
'f1': 0.7895392278953923,
'p': 0.6642221058145626,
'r': 0.9731389102072141},
{'a': 0.5917297612114153,
'f1': 0.5566097406704617,
'p': 0.39391226499552373,
'r': 0.9482758620689655},
{'a': 0.5909980430528375,
'f1': 0.6368375325803649,
'p': 0.476592977893368,
'r': 0.9594240837696335},
{'a': 0.6182612918892666,
'f1': 0.6711297071129707,
'p': 0.514102564102564,
'r': 0.9662650602409638},
{'a': 0.6560942455822383,
'f1': 0.7198228128460686,
'p': 0.5731922398589065,
'r': 0.9672619047619048},
{'a': 0.7583559168925023,
'f1': 0.8118185015828351,
'p': 0.6998180715585203,
'r': 0.966499162479062},
{'a': 0.6424065987384765,
'f1': 0.6754733597534126,
'p': 0.5182432432432432,
'r': 0.9696586599241467},
{'a': 0.6499768196569309,
'f1': 0.711501719526175,
'p': 0.5598316295850871,
'r': 0.9758909853249476},
{'a': 0.6398410332836563,
'f1': 0.6684956561499771,
'p': 0.5094076655052264,
'r': 0.9720744680851063},
{'a': 0.6838709677419355,
'f1': 0.7375937165298108,
'p': 0.5947035118019574,
'r': 0.9708646616541353},
{'a': 0.6774033696729435,
'f1': 0.7107952021323856,
'p': 0.5625879043600562,
'r': 0.9650180940892642},
{'a': 0.5855472901168969,
'f1': 0.6180215475024485,
'p': 0.45526695526695526,
'r': 0.961890243902439},
{'a': 0.6112852664576802,
'f1': 0.5059760956175299,
'p': 0.35082872928176795,
'r': 0.9071428571428571},
{'a': 0.5864067439409906,
'f1': 0.6205896568390528,
'p': 0.4585714285714286,
'r': 0.9596412556053812},
{'a': 0.7295918367346939,
'f1': 0.766107678729038,
'p': 0.6458333333333334,
'r': 0.9414316702819957}],
'www.nymag.com;2010': [{'a': 0.610137073637233,
'f1': 0.2583383869011522,
'p': 0.1552478134110787,
'r': 0.7689530685920578},
{'a': 0.5996937212863706,
'f1': 0.4088647670737223,
'p': 0.27410551849605824,
'r': 0.8042704626334519},
{'a': 0.5323641398407754,
'f1': 0.09993337774816789,
'p': 0.053763440860215055,
'r': 0.7075471698113207},
{'a': 0.5396056727775856,
'f1': 0.13289902280130295,
'p': 0.07264957264957266,
'r': 0.7786259541984732},
{'a': 0.5795157068062827,
'f1': 0.2935678944474986,
'p': 0.1832532601235415,
'r': 0.7375690607734806},
{'a': 0.5422721268163805,
'f1': 0.21160409556313992,
'p': 0.12261041529334213,
'r': 0.7717842323651453},
{'a': 0.5738291026099392,
'f1': 0.14978601997146934,
'p': 0.08306962025316456,
'r': 0.7608695652173914},
{'a': 0.5923586990843069,
'f1': 0.35223281485198193,
'p': 0.22733160621761658,
'r': 0.7817371937639198},
{'a': 0.5419523343127652,
'f1': 0.20056980056980056,
'p': 0.11609498680738786,
'r': 0.7364016736401674},
{'a': 0.5488204456094364,
'f1': 0.19708454810495626,
'p': 0.1135752688172043,
'r': 0.7444933920704846},
{'a': 0.6738836265223275,
'f1': 0.630556974961676,
'p': 0.5,
'r': 0.8533886583679114},
{'a': 0.5518913676042677,
'f1': 0.22569832402234638,
'p': 0.13263296126066973,
'r': 0.7565543071161048},
{'a': 0.5453590192644483,
'f1': 0.13466666666666668,
'p': 0.07383040935672515,
'r': 0.7651515151515151},
{'a': 0.5338837289301686,
'f1': 0.08876933422999327,
'p': 0.04748201438848921,
'r': 0.6804123711340206},
{'a': 0.5486638073243154,
'f1': 0.2262443438914027,
'p': 0.13253810470510272,
'r': 0.7722007722007722},
{'a': 0.6025859135760463,
'f1': 0.24155844155844158,
'p': 0.1449727201870616,
'r': 0.7237354085603113},
{'a': 0.5788934426229508,
'f1': 0.2449479485609308,
'p': 0.145985401459854,
'r': 0.7604562737642585},
{'a': 0.5859914501808615,
'f1': 0.2986072423398329,
'p': 0.18559556786703602,
'r': 0.7635327635327636},
{'a': 0.7148861646234675,
'f1': 0.6052376333656644,
'p': 0.4878811571540266,
'r': 0.7969348659003831},
{'a': 0.6069812540400775,
'f1': 0.17278911564625848,
'p': 0.09822119102861562,
'r': 0.7175141242937854},
{'a': 0.6197007481296758,
'f1': 0.4123314065510597,
'p': 0.27992151733158926,
'r': 0.7824497257769653},
{'a': 0.5450129533678757,
'f1': 0.2326597487711633,
'p': 0.1383116883116883,
'r': 0.7319587628865979},
{'a': 0.5839064649243466,
'f1': 0.24375000000000002,
'p': 0.1448736998514116,
'r': 0.7677165354330708},
{'a': 0.5344352617079889,
'f1': 0.13665389527458494,
'p': 0.07556497175141243,
'r': 0.7133333333333334},
{'a': 0.6615306895680727,
'f1': 0.6655017473789315,
'p': 0.5390214314597654,
'r': 0.8695368558382257}],
'www.nymag.com;2015': [{'a': 0.11785925100019676,
'f1': 0.0046867291563887515,
'p': 0.002350263476905569,
'r': 0.7983193277310925},
{'a': 0.11908450243274496,
'f1': 0.007765845027155882,
'p': 0.003900367819496902,
'r': 0.8681318681318682},
{'a': 0.1261407378573591,
'f1': 0.024808745806887564,
'p': 0.012590016166168618,
'r': 0.8412438625204582},
{'a': 0.167990425416168,
'f1': 0.058888683773306257,
'p': 0.030480619167436378,
'r': 0.8660633484162896},
{'a': 0.2517387360145147,
'f1': 0.25399457340970755,
'p': 0.14703315881326354,
'r': 0.9319690265486725},
{'a': 0.17694978632478633,
'f1': 0.08427684169861223,
'p': 0.0442419893295061,
'r': 0.88625},
{'a': 0.11858546512900191,
'f1': 0.005334123573862795,
'p': 0.0026756515707065704,
'r': 0.8307692307692308},
{'a': 0.12017307357801357,
'f1': 0.010853357451905448,
'p': 0.005462329609763299,
'r': 0.8314606741573034},
{'a': 0.1195611792215739,
'f1': 0.008515036668799528,
'p': 0.0042783658126422,
'r': 0.8737373737373737},
{'a': 0.11900754180784785,
'f1': 0.0065080734623443855,
'p': 0.003267003267003267,
'r': 0.8198757763975155},
{'a': 0.12334629504920383,
'f1': 0.018102630234312266,
'p': 0.009149040826364977,
'r': 0.8473804100227791},
{'a': 0.14935355683662618,
'f1': 0.009062754686226568,
'p': 0.004558423244679107,
'r': 0.7637362637362637},
{'a': 0.11967563325631077,
'f1': 0.008787001129056012,
'p': 0.004416699565732333,
'r': 0.8364485981308412},
{'a': 0.13128675605739826,
'f1': 0.020320997479771855,
'p': 0.010282707332134131,
'r': 0.8549107142857143},
{'a': 0.28787179191196277,
'f1': 0.33165411768954367,
'p': 0.20475776425211245,
'r': 0.8721747388414055},
{'a': 0.12006201821239054,
'f1': 0.009391056370922142,
'p': 0.004722696140246767,
'r': 0.8162393162393162},
{'a': 0.11881144854708324,
'f1': 0.006307282940770671,
'p': 0.0031662791272943154,
'r': 0.7901234567901234},
{'a': 0.6063878676470589,
'f1': 0.7322182272940442,
'p': 0.5841855824395111,
'r': 0.9807370184254607},
{'a': 0.12053396152336082,
'f1': 0.010260690264617801,
'p': 0.005163681284743669,
'r': 0.7946768060836502},
{'a': 0.12112761971155941,
'f1': 0.012387387387387386,
'p': 0.006240134175217048,
'r': 0.8322368421052632},
{'a': 0.15119259919750333,
'f1': 0.014812419146183699,
'p': 0.007479016297070446,
'r': 0.760797342192691},
{'a': 0.15771589590585114,
'f1': 0.031264893718425314,
'p': 0.015923876104476164,
'r': 0.8541666666666666},
{'a': 0.11861266131845134,
'f1': 0.0065361083126520385,
'p': 0.003280629486199156,
'r': 0.8525641025641025},
{'a': 0.11750164077882301,
'f1': 0.0034585834630302126,
'p': 0.0017329306332623657,
'r': 0.8235294117647058},
{'a': 0.11904345162560714,
'f1': 0.006317711803756077,
'p': 0.003170749832792489,
'r': 0.8421052631578947},
{'a': 0.15416851687388988,
'f1': 0.023892643243762612,
'p': 0.01211510978303235,
'r': 0.8574712643678161}]}
[About 4035 more lines. Double-click to unfold]
>>> packages = [os.path.join(f,filename)
... for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
... for filename in os.listdir(f)
... if filename == "results.pkl"]
...
... def trim_results(domain_path):
... domain_results = pickle.load(open(domain_path))
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
... domain_path = os.path.join(*(os.path.split(domain_path)[:-1]))
...
... trimmed_path = os.path.join(domain_path,'trimmed.pkl')
...
... pickle.dump(part,open(trimmed_path,'wb'))
...
... return part
...
... trimmed_results = []
... for pack in packages:
... trimmed_results.append(trim_results(pack))
>>> len(trimmed_results)
17: 9
>>> len(packages)
18: 9
>>> packages
19: ['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter\\results.pkl',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractorFilter\\results.pkl',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\ContentCodeBlurringFilter\\results.pkl',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\DocumentSlopeCurveFilter\\results.pkl',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorDomFilter\\results.pkl',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\FeatureExtractorSplitFilter\\results.pkl',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\GeneralCCB\\results.pkl',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\KFeatureExtractorDomFilter\\results.pkl',
'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\LinkQuotaFilter\\results.pkl']
>>> trimmed_results
20: [{'entertainment.msn.com;2000': [{'a': 0.9472774416594641,
'f1': 0.8551068883610451,
'p': 0.8695652173913043,
'r': 0.8411214953271028},
{'a': 0.9224137931034483,
'f1': 0.8266199649737302,
'p': 0.8973384030418251,
'r': 0.7662337662337663},
{'a': 0.933852140077821,
'f1': 0.9098939929328622,
'p': 0.9501845018450185,
'r': 0.8728813559322034},
{'a': 0.8989405052974735,
'f1': 0.7847222222222223,
'p': 0.7174603174603175,
'r': 0.8659003831417624},
{'a': 0.927536231884058,
'f1': 0.8558758314855874,
'p': 0.8577777777777778,
'r': 0.8539823008849557},
{'a': 0.9419152276295133,
'f1': 0.87987012987013,
'p': 0.9093959731543624,
'r': 0.8522012578616353},
{'a': 0.9335180055401662,
'f1': 0.916083916083916,
'p': 0.9509981851179673,
'r': 0.8836424957841484},
{'a': 0.8910472972972973,
'f1': 0.7425149700598803,
'p': 0.6813186813186813,
'r': 0.8157894736842105},
{'a': 0.894695170229612,
'f1': 0.8129395218002814,
'p': 0.774798927613941,
'r': 0.8550295857988166},
{'a': 0.8803122289679098,
'f1': 0.676056338028169,
'p': 0.6075949367088608,
'r': 0.7619047619047619},
{'a': 0.9037656903765691,
'f1': 0.8804159445407279,
'p': 0.8581081081081081,
'r': 0.9039145907473309},
{'a': 0.9509632224168126,
'f1': 0.9087947882736157,
'p': 0.9,
'r': 0.9177631578947368},
{'a': 0.9333680374804789,
'f1': 0.9330543933054394,
'p': 0.970620239390642,
'r': 0.8982880161127895},
{'a': 0.9530469530469531,
'f1': 0.6356589147286822,
'p': 0.6029411764705882,
'r': 0.6721311475409836},
{'a': 0.9472981987991995,
'f1': 0.9090909090909091,
'p': 0.9360189573459715,
'r': 0.883668903803132},
{'a': 0.9328579916815211,
'f1': 0.8986547085201794,
'p': 0.9488636363636364,
'r': 0.8534923339011925},
{'a': 0.9454148471615721,
'f1': 0.8898678414096917,
'p': 0.9181818181818182,
'r': 0.8632478632478633},
{'a': 0.8812227074235808,
'f1': 0.7301587301587301,
'p': 0.6789667896678967,
'r': 0.7896995708154506},
{'a': 0.8959484346224678,
'f1': 0.7064935064935064,
'p': 0.6507177033492823,
'r': 0.7727272727272727},
{'a': 0.889644746787604,
'f1': 0.7972222222222222,
'p': 0.7572559366754618,
'r': 0.841642228739003},
{'a': 0.9303030303030303,
'f1': 0.8993435448577681,
'p': 0.9383561643835616,
'r': 0.8634453781512605},
{'a': 0.9379900213827512,
'f1': 0.9186155285313378,
'p': 0.9478764478764479,
'r': 0.8911070780399274},
{'a': 0.9497374343585896,
'f1': 0.9130998702983139,
'p': 0.9263157894736842,
'r': 0.9002557544757033},
{'a': 0.9401555954518253,
'f1': 0.9159663865546219,
'p': 0.9527972027972028,
'r': 0.8818770226537217},
{'a': 0.9470013947001394,
'f1': 0.9573273441886581,
'p': 0.9567901234567902,
'r': 0.9578651685393258}],
'entertainment.msn.com;2005': [{'a': 0.6906686260102866,
'f1': 0.6982078853046595,
'p': 0.5553021664766249,
'r': 0.9401544401544402},
{'a': 0.5468451242829828,
'f1': 0.21000000000000002,
'p': 0.12401574803149606,
'r': 0.6847826086956522},
{'a': 0.49862825788751713,
'f1': 0.23455497382198953,
'p': 0.14267515923566879,
'r': 0.6588235294117647},
{'a': 0.6598006644518273,
'f1': 0.580327868852459,
'p': 0.4270205066344994,
'r': 0.9053708439897699},
{'a': 0.6897289586305279,
'f1': 0.7030716723549487,
'p': 0.5640744797371303,
'r': 0.9329710144927537},
{'a': 0.6415343915343915,
'f1': 0.5893939393939394,
'p': 0.44457142857142856,
'r': 0.8741573033707866},
{'a': 0.6283185840707964,
'f1': 0.5948553054662379,
'p': 0.44417767106842737,
'r': 0.9002433090024331},
{'a': 0.7079343365253078,
'f1': 0.7292327203551046,
'p': 0.5958549222797928,
'r': 0.9395424836601307},
{'a': 0.6803051317614425,
'f1': 0.6069906223358909,
'p': 0.4587628865979381,
'r': 0.8967254408060453},
{'a': 0.8139684583199227,
'f1': 0.8720106288751107,
'p': 0.7942718838241226,
'r': 0.9666175748649976},
{'a': 0.7660757733750434,
'f1': 0.8402563493947306,
'p': 0.7378074197582326,
'r': 0.9757442116868799},
{'a': 0.6724023825281271,
'f1': 0.6583850931677019,
'p': 0.5107066381156317,
'r': 0.9262135922330097},
{'a': 0.6503153468815698,
'f1': 0.6453447050461976,
'p': 0.49780701754385964,
'r': 0.9171717171717172},
{'a': 0.7622252131000449,
'f1': 0.8039940828402367,
'p': 0.7003865979381443,
'r': 0.9435763888888888},
{'a': 0.5560640732265446,
'f1': 0.4203187250996016,
'p': 0.2779973649538867,
'r': 0.8612244897959184},
{'a': 0.6134852801519468,
'f1': 0.36899224806201547,
'p': 0.2389558232931727,
'r': 0.8095238095238095},
{'a': 0.6816109422492401,
'f1': 0.6823351023502654,
'p': 0.5357142857142857,
'r': 0.9394572025052192},
{'a': 0.5220125786163522,
'f1': 0.27790973871733965,
'p': 0.16931982633863965,
'r': 0.7748344370860927},
{'a': 0.6473509933774835,
'f1': 0.5988700564971752,
'p': 0.4428969359331476,
'r': 0.9244186046511628},
{'a': 0.662015503875969,
'f1': 0.6466774716369531,
'p': 0.4962686567164179,
'r': 0.9279069767441861},
{'a': 0.5902621722846442,
'f1': 0.3572267920094007,
'p': 0.22926093514328807,
'r': 0.8085106382978723},
{'a': 0.5920763022743947,
'f1': 0.540495867768595,
'p': 0.38698224852071006,
'r': 0.8958904109589041},
{'a': 0.6629581151832461,
'f1': 0.57190357439734,
'p': 0.4226044226044226,
'r': 0.884318766066838},
{'a': 0.7147041593438781,
'f1': 0.7240793201133144,
'p': 0.5889400921658986,
'r': 0.9397058823529412},
{'a': 0.6313432835820896,
'f1': 0.6085578446909667,
'p': 0.4544378698224852,
'r': 0.920863309352518},
{'a': 0.5412639405204461,
'f1': 0.25392986698911735,
'p': 0.15306122448979592,
'r': 0.7446808510638298},
{'a': 0.6824005394470668,
'f1': 0.6713189113747383,
'p': 0.5320796460176991,
'r': 0.9092627599243857}],
'entertainment.msn.com;2010': [{'a': 0.3428857047650783,
'f1': 0.1658206429780034,
'p': 0.09201877934272301,
'r': 0.8376068376068376},
{'a': 0.45119947420308903,
'f1': 0.42013888888888884,
'p': 0.2742520398912058,
'r': 0.8976261127596439},
{'a': 0.6411235342241615,
'f1': 0.5517711171662125,
'p': 0.4136874361593463,
'r': 0.8282208588957055},
{'a': 0.9039820166987798,
'f1': 0.8546426835196889,
'p': 0.8108856088560885,
'r': 0.9033915724563206},
{'a': 0.3921737260804128,
'f1': 0.34117921230482406,
'p': 0.20938215102974828,
'r': 0.9207547169811321},
{'a': 0.5535641547861507,
'f1': 0.3234567901234568,
'p': 0.19969512195121952,
'r': 0.8506493506493507},
{'a': 0.14695238095238095,
'f1': 0.10955363356198429,
'p': 0.058325394305070395,
'r': 0.9003267973856209},
{'a': 0.7608596250571559,
'f1': 0.6565988181221274,
'p': 0.5488474204171241,
'r': 0.8169934640522876},
{'a': 0.884125920964501,
'f1': 0.7920673076923077,
'p': 0.7289823008849557,
'r': 0.8671052631578947},
{'a': 0.896735273243435,
'f1': 0.8186915887850468,
'p': 0.7595375722543353,
'r': 0.8878378378378379},
{'a': 0.7920924033762772,
'f1': 0.7240566037735848,
'p': 0.6220871327254306,
'r': 0.8660084626234132},
{'a': 0.36601513240857503,
'f1': 0.25046589638464406,
'p': 0.14608695652173914,
'r': 0.8772845953002611},
{'a': 0.1811268579329416,
'f1': 0.1899469994870918,
'p': 0.10574909575480677,
'r': 0.9320469798657718},
{'a': 0.41000352236703064,
'f1': 0.32595573440643866,
'p': 0.20009881422924902,
'r': 0.8785249457700651},
{'a': 0.7624944714727997,
'f1': 0.6670799752014879,
'p': 0.5563598759048604,
'r': 0.8328173374613003},
{'a': 0.4393613754989254,
'f1': 0.42506297229219153,
'p': 0.27439024390243905,
'r': 0.9427374301675978},
{'a': 0.8097795364612775,
'f1': 0.837321730722746,
'p': 0.7646799116997792,
'r': 0.9252136752136753},
{'a': 0.13969136253931105,
'f1': 0.13577253691866875,
'p': 0.07321131447587355,
'r': 0.9333333333333333},
{'a': 0.6126237623762376,
'f1': 0.44503546099290786,
'p': 0.2977461447212337,
'r': 0.8807017543859649},
{'a': 0.7682926829268293,
'f1': 0.7115384615384617,
'p': 0.6161262050832603,
'r': 0.8419161676646707},
{'a': 0.6949648711943794,
'f1': 0.7540132200188858,
'p': 0.6240719030871434,
'r': 0.9522957662492546},
{'a': 0.4694069657985566,
'f1': 0.4663931839697065,
'p': 0.3124735729386892,
'r': 0.9191542288557214},
{'a': 0.789193302891933,
'f1': 0.7656514382402708,
'p': 0.6830188679245283,
'r': 0.8710298363811357},
{'a': 0.3921901528013582,
'f1': 0.32634408602150533,
'p': 0.19927774130006565,
'r': 0.900593471810089},
{'a': 0.7479967948717948,
'f1': 0.6831234256926951,
'p': 0.5809768637532133,
'r': 0.8288508557457213}],
'entertainment.msn.com;2015': [{'a': 0.5922783603431839,
'f1': 0.30057236304170076,
'p': 0.18274010737721216,
'r': 0.8462246777163904},
{'a': 0.5627964528768818,
'f1': 0.08620689655172414,
'p': 0.045599635202918376,
'r': 0.7874015748031497},
{'a': 0.5792236086353734,
'f1': 0.20978240654640132,
'p': 0.12028150991682661,
'r': 0.8197674418604651},
{'a': 0.5620437956204379,
'f1': 0.08229211546747094,
'p': 0.0435278030993619,
'r': 0.7519685039370079},
{'a': 0.5617125883437468,
'f1': 0.08782775527606056,
'p': 0.046627433227704844,
'r': 0.7545787545787546},
{'a': 0.5577628361858191,
'f1': 0.08821676118462508,
'p': 0.04678102027177545,
'r': 0.7720588235294118},
{'a': 0.49898887765419614,
'f1': 0.17462520821765687,
'p': 0.09741365959423881,
'r': 0.8420348058902276},
{'a': 0.5571502323180175,
'f1': 0.06700021753317381,
'p': 0.03503184713375796,
'r': 0.7661691542288557},
{'a': 0.5541204819277108,
'f1': 0.11345343043311613,
'p': 0.060980634528224144,
'r': 0.8131868131868132},
{'a': 0.5564376590330788,
'f1': 0.07825719120135363,
'p': 0.04120267260579064,
'r': 0.7773109243697479},
{'a': 0.4916342588405535,
'f1': 0.0852725793327909,
'p': 0.04492455418381344,
'r': 0.8370607028753994},
{'a': 0.5661891699685055,
'f1': 0.12139917695473253,
'p': 0.06573083778966132,
'r': 0.793010752688172},
{'a': 0.5645177312009536,
'f1': 0.16622289844047167,
'p': 0.09192259150189314,
'r': 0.8670634920634921},
{'a': 0.5425956576769545,
'f1': 0.11157337367624812,
'p': 0.059995932479153954,
'r': 0.7951482479784366},
{'a': 0.5201313937118723,
'f1': 0.0501579045142114,
'p': 0.025921658986175114,
'r': 0.7714285714285715},
{'a': 0.5346083788706739,
'f1': 0.19119974675530232,
'p': 0.10803076372741907,
'r': 0.8308115543328748},
{'a': 0.5627691984452148,
'f1': 0.05063868613138687,
'p': 0.02616690240452617,
'r': 0.7816901408450704},
{'a': 0.5659126365054602,
'f1': 0.159682899207248,
'p': 0.08790523690773068,
'r': 0.8703703703703703},
{'a': 0.5280909612077203,
'f1': 0.09525554130793186,
'p': 0.05061319836480436,
'r': 0.8074534161490683},
{'a': 0.5116564417177915,
'f1': 0.16935002981514607,
'p': 0.09424257507881201,
'r': 0.8340675477239354},
{'a': 0.5793736501079914,
'f1': 0.3330479452054795,
'p': 0.20717202201313686,
'r': 0.8487272727272728},
{'a': 0.5667684090041969,
'f1': 0.1768756795940558,
'p': 0.09878542510121457,
'r': 0.8442906574394463},
{'a': 0.5547138908793121,
'f1': 0.05884898312418867,
'p': 0.030665163472378805,
'r': 0.7272727272727273},
{'a': 0.5545847039473685,
'f1': 0.05824820691154097,
'p': 0.030330466274332276,
'r': 0.73224043715847},
{'a': 0.5695238095238095,
'f1': 0.1908342284282134,
'p': 0.10769852495453627,
'r': 0.8367346938775511}],
'news.bbc.co.uk;2000': [{'a': 0.9118311981914092,
'f1': 0.8494208494208494,
'p': 0.88,
'r': 0.8208955223880597},
{'a': 0.9057377049180327,
'f1': 0.8410138248847927,
'p': 0.8837772397094431,
'r': 0.8021978021978022},
{'a': 0.9044198895027624,
'f1': 0.9006318207926479,
'p': 0.9223529411764706,
'r': 0.8799102132435466},
{'a': 0.9266358228684732,
'f1': 0.9125295508274233,
'p': 0.9429967426710097,
'r': 0.8839694656488549},
{'a': 0.9117997616209773,
'f1': 0.9022457067371201,
'p': 0.918010752688172,
'r': 0.887012987012987},
{'a': 0.8712029161603888,
'f1': 0.825944170771757,
'p': 0.8152350081037277,
'r': 0.8369384359400999},
{'a': 0.9092284417549168,
'f1': 0.8529411764705883,
'p': 0.8969072164948454,
'r': 0.8130841121495327},
{'a': 0.9022482893450635,
'f1': 0.8511904761904762,
'p': 0.8746177370030581,
'r': 0.8289855072463768},
{'a': 0.8823114869626497,
'f1': 0.7930607187112763,
'p': 0.8142493638676844,
'r': 0.7729468599033816},
{'a': 0.9267782426778243,
'f1': 0.9076517150395779,
'p': 0.9502762430939227,
'r': 0.8686868686868687},
{'a': 0.9225251076040172,
'f1': 0.8689320388349514,
'p': 0.9132653061224489,
'r': 0.8287037037037037},
{'a': 0.8396866840731071,
'f1': 0.8188790560471977,
'p': 0.7797752808988764,
'r': 0.8621118012422361},
{'a': 0.9097978227060654,
'f1': 0.8284023668639052,
'p': 0.8588957055214724,
'r': 0.8},
{'a': 0.8773255813953489,
'f1': 0.8451944240645635,
'p': 0.8458149779735683,
'r': 0.844574780058651},
{'a': 0.9183238636363636,
'f1': 0.8762109795479011,
'p': 0.9187358916478555,
'r': 0.8374485596707819},
{'a': 0.9077791718946048,
'f1': 0.8746803069053709,
'p': 0.9210053859964094,
'r': 0.8327922077922078},
{'a': 0.9012187299550994,
'f1': 0.8555347091932457,
'p': 0.9101796407185628,
'r': 0.8070796460176991},
{'a': 0.8915866741953699,
'f1': 0.8748370273794003,
'p': 0.8946666666666667,
'r': 0.8558673469387755},
{'a': 0.9106292966684294,
'f1': 0.9038133181559476,
'p': 0.928654970760234,
'r': 0.8802660753880266},
{'a': 0.8931178310740354,
'f1': 0.8686739269698911,
'p': 0.9125168236877523,
'r': 0.8288508557457213},
{'a': 0.8817005545286506,
'f1': 0.8502340093603743,
'p': 0.8596214511041009,
'r': 0.8410493827160493},
{'a': 0.9145496535796767,
'f1': 0.8948863636363636,
'p': 0.9402985074626866,
'r': 0.8536585365853658},
{'a': 0.9274905422446406,
'f1': 0.9117421335379894,
'p': 0.9565217391304348,
'r': 0.8709677419354839},
{'a': 0.8952116585704372,
'f1': 0.8422152560083594,
'p': 0.8448637316561844,
'r': 0.8395833333333333},
{'a': 0.9093789607097592,
'f1': 0.8686868686868686,
'p': 0.9148936170212766,
'r': 0.8269230769230769}],
'news.bbc.co.uk;2005': [{'a': 0.8391019644527596,
'f1': 0.7152317880794702,
'p': 0.6189111747851003,
'r': 0.8470588235294118},
{'a': 0.8142076502732241,
'f1': 0.7186761229314422,
'p': 0.6166328600405679,
'r': 0.8611898016997167},
{'a': 0.7044967880085653,
'f1': 0.7008670520231215,
'p': 0.5652680652680653,
'r': 0.9220532319391636},
{'a': 0.8440366972477065,
'f1': 0.8042226487523992,
'p': 0.7325174825174825,
'r': 0.8914893617021277},
{'a': 0.8418952618453865,
'f1': 0.8483978957436633,
'p': 0.8063636363636364,
'r': 0.8950554994954592},
{'a': 0.8062460165710643,
'f1': 0.8033635187580855,
'p': 0.711340206185567,
'r': 0.9227340267459139},
{'a': 0.9006181645268664,
'f1': 0.914238818219122,
'p': 0.9034874290348743,
'r': 0.925249169435216},
{'a': 0.82328190743338,
'f1': 0.7700729927007299,
'p': 0.6963696369636964,
'r': 0.8612244897959184},
{'a': 0.8155339805825242,
'f1': 0.7031250000000001,
'p': 0.6094808126410836,
'r': 0.8307692307692308},
{'a': 0.836343732895457,
'f1': 0.8280621046578494,
'p': 0.7903402854006586,
'r': 0.8695652173913043},
{'a': 0.8426966292134831,
'f1': 0.8038528896672504,
'p': 0.765,
'r': 0.8468634686346863},
{'a': 0.8250950570342205,
'f1': 0.7415730337078651,
'p': 0.6626506024096386,
'r': 0.8418367346938775},
{'a': 0.8407202216066482,
'f1': 0.789762340036563,
'p': 0.7728085867620751,
'r': 0.8074766355140187},
{'a': 0.8260200153964589,
'f1': 0.755939524838013,
'p': 0.6809338521400778,
'r': 0.8495145631067961},
{'a': 0.7973811164713991,
'f1': 0.6931106471816284,
'p': 0.5981981981981982,
'r': 0.8238213399503722},
{'a': 0.8313349320543565,
'f1': 0.7365792759051186,
'p': 0.6526548672566371,
'r': 0.8452722063037249},
{'a': 0.7385358004827032,
'f1': 0.6277205040091639,
'p': 0.4823943661971831,
'r': 0.898360655737705},
{'a': 0.7896613190730838,
'f1': 0.7941860465116279,
'p': 0.6905965621840243,
'r': 0.9343365253077975},
{'a': 0.6781193490054249,
'f1': 0.5180505415162455,
'p': 0.38523489932885907,
'r': 0.790633608815427},
{'a': 0.794679005205321,
'f1': 0.8104644954618259,
'p': 0.711340206185567,
'r': 0.9416873449131513},
{'a': 0.8725854383358098,
'f1': 0.8961550105964273,
'p': 0.8716136631330977,
'r': 0.9221183800623053},
{'a': 0.7995110024449877,
'f1': 0.638235294117647,
'p': 0.5331695331695332,
'r': 0.7948717948717948},
{'a': 0.8473439917483239,
'f1': 0.8435517970401691,
'p': 0.8093306288032455,
'r': 0.8807947019867549},
{'a': 0.709572742022715,
'f1': 0.6546623794212219,
'p': 0.5146612740141557,
'r': 0.8992932862190812},
{'a': 0.8297376093294461,
'f1': 0.8027027027027026,
'p': 0.7342398022249691,
'r': 0.8852459016393442}],
'news.bbc.co.uk;2010': [{'a': 0.7625243981782693,
'f1': 0.7058823529411765,
'p': 0.6311239193083573,
'r': 0.8007312614259597},
{'a': 0.7582283624542869,
'f1': 0.5697758496023138,
'p': 0.4586728754365541,
'r': 0.7519083969465649},
{'a': 0.7143962848297214,
'f1': 0.45089285714285715,
'p': 0.33554817275747506,
'r': 0.6870748299319728},
{'a': 0.706855791962175,
'f1': 0.5267175572519084,
'p': 0.3942857142857143,
'r': 0.7931034482758621},
{'a': 0.7664473684210527,
'f1': 0.32746955345060896,
'p': 0.21530249110320285,
'r': 0.6836158192090396},
{'a': 0.7461730153079388,
'f1': 0.6067291781577496,
'p': 0.5018248175182481,
'r': 0.7670850767085077},
{'a': 0.6400894187779433,
'f1': 0.02424242424242424,
'p': 0.013729977116704805,
'r': 0.10344827586206896},
{'a': 0.7614255765199162,
'f1': 0.4878487848784878,
'p': 0.3702185792349727,
'r': 0.7150395778364116},
{'a': 0.7620689655172413,
'f1': 0.463035019455253,
'p': 0.3380681818181818,
'r': 0.7345679012345679},
{'a': 0.7661676646706587,
'f1': 0.7279693486590039,
'p': 0.6626506024096386,
'r': 0.8075734157650696},
{'a': 0.7575757575757576,
'f1': 0.456,
'p': 0.33727810650887574,
'r': 0.7037037037037037},
{'a': 0.7598484848484849,
'f1': 0.6313953488372093,
'p': 0.5386904761904762,
'r': 0.7626404494382022},
{'a': 0.754180602006689,
'f1': 0.48148148148148145,
'p': 0.3684210526315789,
'r': 0.6946564885496184},
{'a': 0.7579535683576956,
'f1': 0.49141824751580854,
'p': 0.37006802721088433,
'r': 0.7311827956989247},
{'a': 0.7263339070567987,
'f1': 0.29646017699115046,
'p': 0.18457300275482094,
'r': 0.7528089887640449},
{'a': 0.7547416612164813,
'f1': 0.6553308823529411,
'p': 0.5882838283828383,
'r': 0.7396265560165975},
{'a': 0.7636441770519983,
'f1': 0.5454545454545453,
'p': 0.4263565891472868,
'r': 0.7568807339449541},
{'a': 0.6528982992016661,
'f1': 0.4959677419354839,
'p': 0.3614988978692138,
'r': 0.7897271268057785},
{'a': 0.6826923076923077,
'f1': 0.5428424833247819,
'p': 0.41884402216943784,
'r': 0.7711370262390671},
{'a': 0.7665213015766521,
'f1': 0.7015437392795882,
'p': 0.621580547112462,
'r': 0.8051181102362205},
{'a': 0.7759719566602932,
'f1': 0.7356148928168484,
'p': 0.6694045174537988,
'r': 0.8163606010016694},
{'a': 0.7036881810561609,
'f1': 0.4532095901005414,
'p': 0.32447397563676633,
'r': 0.7512820512820513},
{'a': 0.7713230355943587,
'f1': 0.6939325842696629,
'p': 0.628152969894223,
'r': 0.7751004016064257},
{'a': 0.6798048048048048,
'f1': 0.5066512434933488,
'p': 0.3721325403568394,
'r': 0.7934782608695652},
{'a': 0.6916354556803995,
'f1': 0.43650190114068443,
'p': 0.30434782608695654,
'r': 0.771505376344086}],
'news.bbc.co.uk;2015': [{'a': 0.5555871077665767,
'f1': 0.23658536585365852,
'p': 0.14049826187717265,
'r': 0.7484567901234568},
{'a': 0.5497448979591837,
'f1': 0.20236003012804415,
'p': 0.11711711711711711,
'r': 0.7435424354243543},
{'a': 0.563236936825121,
'f1': 0.3763955342902711,
'p': 0.24402573529411764,
'r': 0.8226181254841208},
{'a': 0.5506756756756757,
'f1': 0.14048890137679126,
'p': 0.07836990595611286,
'r': 0.6775067750677507},
{'a': 0.5788987191337647,
'f1': 0.36838978015448604,
'p': 0.23779084633086167,
'r': 0.8172231985940246},
{'a': 0.5673118905545045,
'f1': 0.1927776269345642,
'p': 0.11216429699842022,
'r': 0.6853281853281853},
{'a': 0.5444834855938159,
'f1': 0.19995062947420392,
'p': 0.11571428571428571,
'r': 0.73502722323049},
{'a': 0.5694312474548663,
'f1': 0.2543488481429243,
'p': 0.15510321100917432,
'r': 0.706266318537859},
{'a': 0.5528025381477565,
'f1': 0.11217756448710257,
'p': 0.061131088591042826,
'r': 0.68},
{'a': 0.5627456909585727,
'f1': 0.1770062606715993,
'p': 0.10103963612735542,
'r': 0.713302752293578},
{'a': 0.48321324543921507,
'f1': 0.06335093081411503,
'p': 0.03328467153284671,
'r': 0.6551724137931034},
{'a': 0.5497154836777478,
'f1': 0.08183206106870229,
'p': 0.043762246897452645,
'r': 0.6291079812206573},
{'a': 0.558078141499472,
'f1': 0.2945638432364096,
'p': 0.1813700051894136,
'r': 0.7836322869955157},
{'a': 0.5500292568753657,
'f1': 0.15864332603938733,
'p': 0.08876645240281604,
'r': 0.7455012853470437},
{'a': 0.5471483430521974,
'f1': 0.1939240506329114,
'p': 0.1114019778941245,
'r': 0.748046875},
{'a': 0.5554911619820342,
'f1': 0.1520176893311222,
'p': 0.08550995024875623,
'r': 0.6840796019900498},
{'a': 0.5763792625450513,
'f1': 0.26503126503126506,
'p': 0.1626808385001476,
'r': 0.7146562905317769},
{'a': 0.5585113353426812,
'f1': 0.31202777210537064,
'p': 0.19435258204019334,
'r': 0.7908902691511387},
{'a': 0.5703114281794485,
'f1': 0.30182790905037893,
'p': 0.18701657458563536,
'r': 0.7817551963048499},
{'a': 0.5642479213907785,
'f1': 0.25210810810810813,
'p': 0.1531791907514451,
'r': 0.7118437118437119},
{'a': 0.5640256959314776,
'f1': 0.23726273726273728,
'p': 0.14061574896388396,
'r': 0.7587859424920128},
{'a': 0.5690190257725003,
'f1': 0.2325804901489668,
'p': 0.1399652978600347,
'r': 0.6875},
{'a': 0.5737860137968348,
'f1': 0.3251231527093596,
'p': 0.2042518837459634,
'r': 0.7964323189926548},
{'a': 0.5553221288515406,
'f1': 0.24530544330877105,
'p': 0.1462999716472923,
'r': 0.7588235294117647},
{'a': 0.5427863292460214,
'f1': 0.1616838077015068,
'p': 0.09164859002169197,
'r': 0.6855983772819473}],
'news.yahoo.com;2000': [{'a': 0.9225543478260869,
'f1': 0.9279393173198484,
'p': 0.9607329842931938,
'r': 0.8973105134474327},
{'a': 0.8682457438934122,
'f1': 0.8540983606557376,
'p': 0.837620578778135,
'r': 0.8712374581939799},
{'a': 0.9199632014719411,
'f1': 0.9085173501577286,
'p': 0.9230769230769231,
'r': 0.8944099378881988},
{'a': 0.8974093264248705,
'f1': 0.8626907073509015,
'p': 0.8405405405405405,
'r': 0.886039886039886},
{'a': 0.9158878504672897,
'f1': 0.8979591836734695,
'p': 0.9145496535796767,
'r': 0.8819599109131403},
{'a': 0.9133271202236719,
'f1': 0.8963210702341137,
'p': 0.919908466819222,
'r': 0.8739130434782608},
{'a': 0.8031042128603104,
'f1': 0.8314350797266515,
'p': 0.909468438538206,
'r': 0.7657342657342657},
{'a': 0.8618903754855416,
'f1': 0.8683127572016461,
'p': 0.8591205211726385,
'r': 0.8777038269550749},
{'a': 0.8721804511278195,
'f1': 0.859338061465721,
'p': 0.8644470868014269,
'r': 0.854289071680376},
{'a': 0.877562028047465,
'f1': 0.8540192926045016,
'p': 0.8634590377113134,
'r': 0.8447837150127226},
{'a': 0.8776978417266187,
'f1': 0.8478747203579418,
'p': 0.838495575221239,
'r': 0.8574660633484162},
{'a': 0.920123839009288,
'f1': 0.9315649867374005,
'p': 0.9430719656283566,
'r': 0.9203354297693921},
{'a': 0.8999055712936733,
'f1': 0.8582887700534759,
'p': 0.856,
'r': 0.8605898123324397},
{'a': 0.9074235807860263,
'f1': 0.8960784313725491,
'p': 0.8943248532289628,
'r': 0.8978388998035364},
{'a': 0.8967314069161535,
'f1': 0.9213564213564215,
'p': 0.8948843728100911,
'r': 0.9494423791821561},
{'a': 0.8570649208947081,
'f1': 0.844418052256532,
'p': 0.8535414165666266,
'r': 0.8354876615746181},
{'a': 0.9123867069486404,
'f1': 0.8642745709828393,
'p': 0.9111842105263158,
'r': 0.8219584569732937},
{'a': 0.8925714285714286,
'f1': 0.8164062500000001,
'p': 0.8038461538461539,
'r': 0.8293650793650794},
{'a': 0.9205298013245033,
'f1': 0.9025522041763342,
'p': 0.9131455399061033,
'r': 0.8922018348623854},
{'a': 0.8363959691760522,
'f1': 0.7730263157894737,
'p': 0.7617504051863857,
'r': 0.7846410684474123},
{'a': 0.8599656357388317,
'f1': 0.6433260393873085,
'p': 0.6099585062240664,
'r': 0.6805555555555556},
{'a': 0.90089358245329,
'f1': 0.8842504743833016,
'p': 0.8944337811900192,
'r': 0.874296435272045},
{'a': 0.8973544973544973,
'f1': 0.8422764227642278,
'p': 0.8248407643312102,
'r': 0.8604651162790697},
{'a': 0.9201773835920177,
'f1': 0.924791086350975,
'p': 0.924791086350975,
'r': 0.924791086350975},
{'a': 0.9150214592274678,
'f1': 0.9247148288973385,
'p': 0.8928046989720999,
'r': 0.9589905362776026}],
'news.yahoo.com;2005': [{'a': 0.826288899210404,
'f1': 0.8064182194616977,
'p': 0.7369914853358562,
'r': 0.8902857142857142},
{'a': 0.7051349920592906,
'f1': 0.6634441087613293,
'p': 0.5294117647058824,
'r': 0.8883495145631068},
{'a': 0.8285714285714286,
'f1': 0.827937095282146,
'p': 0.766923736075407,
'r': 0.8994974874371859},
{'a': 0.7835408022130014,
'f1': 0.7016205910390848,
'p': 0.5832012678288431,
'r': 0.8803827751196173},
{'a': 0.7626582278481012,
'f1': 0.5689655172413792,
'p': 0.42950108459869846,
'r': 0.8425531914893617},
{'a': 0.8167247386759582,
'f1': 0.7674624226348364,
'p': 0.6625954198473283,
'r': 0.9117647058823529},
{'a': 0.6875776397515528,
'f1': 0.5755274261603376,
'p': 0.4289308176100629,
'r': 0.8743589743589744},
{'a': 0.7848872638634978,
'f1': 0.7252918287937743,
'p': 0.6044098573281452,
'r': 0.9066147859922179},
{'a': 0.915273132664437,
'f1': 0.8756137479541735,
'p': 0.8784893267651889,
'r': 0.8727569331158238},
{'a': 0.7367066895368782,
'f1': 0.7725925925925926,
'p': 0.6530995616781465,
'r': 0.9456029011786038},
{'a': 0.7989203778677463,
'f1': 0.6179487179487179,
'p': 0.5205183585313174,
'r': 0.7602523659305994},
{'a': 0.6863844977052524,
'f1': 0.7047527604416708,
'p': 0.5707620528771384,
'r': 0.9209535759096612},
{'a': 0.9134049186006234,
'f1': 0.9062265566391597,
'p': 0.94375,
'r': 0.8715728715728716},
{'a': 0.8586535072259429,
'f1': 0.8875175315568024,
'p': 0.8401486988847584,
'r': 0.9405469678953626},
{'a': 0.8676420551207894,
'f1': 0.8979805927091529,
'p': 0.852165256346441,
'r': 0.9490022172949002},
{'a': 0.7374233128834355,
'f1': 0.7720170454545455,
'p': 0.6516786570743405,
'r': 0.9468641114982579},
{'a': 0.7749169435215947,
'f1': 0.47984644913627633,
'p': 0.36231884057971014,
'r': 0.7102272727272727},
{'a': 0.7755662319835278,
'f1': 0.6765578635014837,
'p': 0.5652892561983471,
'r': 0.8423645320197044},
{'a': 0.8507462686567164,
'f1': 0.8557692307692308,
'p': 0.7837573385518591,
'r': 0.9423529411764706},
{'a': 0.8466257668711656,
'f1': 0.8619957537154989,
'p': 0.79296875,
'r': 0.9441860465116279},
{'a': 0.6668734491315137,
'f1': 0.5095890410958904,
'p': 0.3661417322834646,
'r': 0.8378378378378378},
{'a': 0.8207322872087494,
'f1': 0.7851851851851852,
'p': 0.7298728813559322,
'r': 0.8495684340320592},
{'a': 0.8586556169429097,
'f1': 0.8662309368191722,
'p': 0.8174342105263158,
'r': 0.9212233549582948},
{'a': 0.6675409836065573,
'f1': 0.5053658536585366,
'p': 0.36022253129346316,
'r': 0.8464052287581699},
{'a': 0.8410117434507678,
'f1': 0.8462882096069869,
'p': 0.7795655671761866,
'r': 0.9255014326647565}],
'news.yahoo.com;2010': [{'a': 0.8125,
'f1': 0.7473982970671712,
'p': 0.6954225352112676,
'r': 0.8077709611451943},
{'a': 0.792352371732817,
'f1': 0.49230769230769234,
'p': 0.37749546279491836,
'r': 0.7074829931972789},
{'a': 0.7949526813880127,
'f1': 0.6627756160830091,
'p': 0.5774011299435028,
'r': 0.7777777777777778},
{'a': 0.7983315197678637,
'f1': 0.7148717948717948,
'p': 0.6606635071090048,
'r': 0.7787709497206704},
{'a': 0.8179453836150845,
'f1': 0.7824397824397824,
'p': 0.7437223042836041,
'r': 0.8254098360655737},
{'a': 0.801693404634581,
'f1': 0.6120313862249346,
'p': 0.5043103448275862,
'r': 0.7782705099778271},
{'a': 0.8084656084656084,
'f1': 0.7454289732770745,
'p': 0.6824034334763949,
'r': 0.8212809917355371},
{'a': 0.7891472868217054,
'f1': 0.6472114137483787,
'p': 0.558165548098434,
'r': 0.7700617283950617},
{'a': 0.8128453038674033,
'f1': 0.7665805340223945,
'p': 0.7212317666126418,
'r': 0.8180147058823529},
{'a': 0.7882805816937554,
'f1': 0.5607808340727595,
'p': 0.4520743919885551,
'r': 0.7383177570093458},
{'a': 0.7756706753006476,
'f1': 0.41495778045838355,
'p': 0.29965156794425085,
'r': 0.6745098039215687},
{'a': 0.7818003913894325,
'f1': 0.4677804295942721,
'p': 0.3391003460207612,
'r': 0.7538461538461538},
{'a': 0.8298582151793161,
'f1': 0.8300943920044419,
'p': 0.806799784133837,
'r': 0.8547741566609491},
{'a': 0.7997992975413949,
'f1': 0.4017991004497751,
'p': 0.2809224318658281,
'r': 0.7052631578947368},
{'a': 0.8048540505083634,
'f1': 0.7550432276657061,
'p': 0.7005347593582888,
'r': 0.81875},
{'a': 0.8002373417721519,
'f1': 0.6918852959121414,
'p': 0.6203501094091903,
'r': 0.7820689655172414},
{'a': 0.7970494417862839,
'f1': 0.6688353936239428,
'p': 0.5881006864988558,
'r': 0.77526395173454},
{'a': 0.7986111111111112,
'f1': 0.6393562545720556,
'p': 0.5588235294117647,
'r': 0.747008547008547},
{'a': 0.799609375,
'f1': 0.673871582962492,
'p': 0.6057142857142858,
'r': 0.7593123209169055},
{'a': 0.7792207792207793,
'f1': 0.6678507992895204,
'p': 0.5784615384615385,
'r': 0.7899159663865546},
{'a': 0.8131220051603391,
'f1': 0.7050610820244329,
'p': 0.637223974763407,
'r': 0.7890625},
{'a': 0.7875927174645988,
'f1': 0.7403132728771641,
'p': 0.6834094368340944,
'r': 0.8075539568345323},
{'a': 0.8000719165767709,
'f1': 0.728780487804878,
'p': 0.6772438803263826,
'r': 0.7888067581837381},
{'a': 0.8107739515854074,
'f1': 0.754750331418471,
'p': 0.7,
'r': 0.8187919463087249},
{'a': 0.8030973451327433,
'f1': 0.5250800426894343,
'p': 0.41765704584040747,
'r': 0.7068965517241379}],
'news.yahoo.com;2015': [{'a': 0.3048423700544117,
'f1': 0.035215543412264724,
'p': 0.018075574600701208,
'r': 0.6803519061583577},
{'a': 0.34525586353944565,
'f1': 0.025003968883949835,
'p': 0.012745296378717378,
'r': 0.6548856548856549},
{'a': 0.2560697667057073,
'f1': 0.01878796735068785,
'p': 0.009526228883525974,
'r': 0.6766917293233082},
{'a': 0.3435495898583147,
'f1': 0.04774095842498107,
'p': 0.024713480419606526,
'r': 0.6997885835095138},
{'a': 0.38345512460183623,
'f1': 0.02623921085080148,
'p': 0.013366162504396765,
'r': 0.7112299465240641},
{'a': 0.3304576046566016,
'f1': 0.06543344214726152,
'p': 0.03422760217053087,
'r': 0.7411668036154478},
{'a': 0.3483621870718645,
'f1': 0.052654450640979206,
'p': 0.027355508729680914,
'r': 0.7003853564547207},
{'a': 0.3494353008685673,
'f1': 0.056013927787449846,
'p': 0.029069767441860465,
'r': 0.7660455486542443},
{'a': 0.2613521237506237,
'f1': 0.03416572750459695,
'p': 0.017480678185570347,
'r': 0.7506516072980017},
{'a': 0.38774996063612027,
'f1': 0.028579994004197064,
'p': 0.014579934747145187,
'r': 0.7185929648241206},
{'a': 0.2916898903840539,
'f1': 0.008692099104788083,
'p': 0.004374976629398347,
'r': 0.6573033707865169},
{'a': 0.34022892717958775,
'f1': 0.036302448804238864,
'p': 0.01866166077738516,
'r': 0.6636125654450262},
{'a': 0.3620855236554792,
'f1': 0.04883747220861439,
'p': 0.02526020348497252,
'r': 0.7330316742081447},
{'a': 0.33174694993689524,
'f1': 0.04314596588983848,
'p': 0.022292250233426705,
'r': 0.6686114352392065},
{'a': 0.24957875777119284,
'f1': 0.029311187103077677,
'p': 0.014970059880239521,
'r': 0.6976744186046512},
{'a': 0.3363527076518773,
'f1': 0.022886309376800855,
'p': 0.011643979057591623,
'r': 0.6634844868735084},
{'a': 0.3411867364746946,
'f1': 0.03392680875955105,
'p': 0.01741349545898071,
'r': 0.6562942008486563},
{'a': 0.2571102978941962,
'f1': 0.013344418153524759,
'p': 0.00674612582710089,
'r': 0.6089494163424124},
{'a': 0.3310363836824697,
'f1': 0.03394246426632894,
'p': 0.017437537180249853,
'r': 0.6346414073071719},
{'a': 0.34860527514807876,
'f1': 0.04199240562876927,
'p': 0.02163157289149695,
'r': 0.714828897338403},
{'a': 0.33099696356275304,
'f1': 0.01856045139017781,
'p': 0.009425426029256523,
'r': 0.6024096385542169},
{'a': 0.2689490523443717,
'f1': 0.024784973026843165,
'p': 0.012613009922822492,
'r': 0.7087980173482032},
{'a': 0.34358827597720065,
'f1': 0.07051195461299474,
'p': 0.03700552956188856,
'r': 0.7457142857142857},
{'a': 0.34750822755054067,
'f1': 0.06406581919951444,
'p': 0.03352152434721242,
'r': 0.721336370539104},
{'a': 0.25777743020254945,
'f1': 0.010921672433198549,
'p': 0.005512423993772223,
'r': 0.5835189309576837}],
'thenation.com;2000': [{'a': 0.948016415868673,
'f1': 0.963035019455253,
'p': 0.9611650485436893,
'r': 0.9649122807017544},
{'a': 0.900839054157132,
'f1': 0.9214975845410629,
'p': 0.8965922444183314,
'r': 0.9478260869565217},
{'a': 0.8160337552742616,
'f1': 0.844950213371266,
'p': 0.75,
'r': 0.9674267100977199},
{'a': 0.9159420289855073,
'f1': 0.9504950495049507,
'p': 0.9200264375413086,
'r': 0.9830508474576272},
{'a': 0.7521212121212121,
'f1': 0.8069844266163284,
'p': 0.6939935064935064,
'r': 0.963923337091319},
{'a': 0.8325673013788575,
'f1': 0.8754274548119199,
'p': 0.7950310559006211,
'r': 0.9739130434782609},
{'a': 0.9291457286432161,
'f1': 0.9593190998268898,
'p': 0.9308510638297872,
'r': 0.9895833333333334},
{'a': 0.9315551082033215,
'f1': 0.9603960396039604,
'p': 0.9371980676328503,
'r': 0.9847715736040609},
{'a': 0.8867091711623345,
'f1': 0.925459825750242,
'p': 0.8749237339841367,
'r': 0.9821917808219178},
{'a': 0.9161966156325544,
'f1': 0.9312169312169312,
'p': 0.9130998702983139,
'r': 0.9500674763832658},
{'a': 0.9013710747456878,
'f1': 0.9372007885102787,
'p': 0.8946236559139785,
'r': 0.984033116499113},
{'a': 0.7989382879893829,
'f1': 0.8438948995363215,
'p': 0.7465815861440291,
'r': 0.9703791469194313},
{'a': 0.92643391521197,
'f1': 0.8747346072186836,
'p': 0.8841201716738197,
'r': 0.865546218487395},
{'a': 0.9326113116726835,
'f1': 0.8828451882845187,
'p': 0.8865546218487395,
'r': 0.8791666666666667},
{'a': 0.929305912596401,
'f1': 0.9499089253187614,
'p': 0.924645390070922,
'r': 0.9765917602996255},
{'a': 0.8337819650067295,
'f1': 0.8753154972236243,
'p': 0.7903372835004557,
'r': 0.9807692307692307},
{'a': 0.9232209737827716,
'f1': 0.9459815546772069,
'p': 0.9220890410958904,
'r': 0.9711451758340848},
{'a': 0.9671549045716822,
'f1': 0.9795353982300885,
'p': 0.9838888888888889,
'r': 0.9752202643171806},
{'a': 0.8517538054268696,
'f1': 0.8909444985394352,
'p': 0.8198924731182796,
'r': 0.9754797441364605},
{'a': 0.9136400322841001,
'f1': 0.9288090485695276,
'p': 0.9148099606815203,
'r': 0.9432432432432433},
{'a': 0.949293246578416,
'f1': 0.9712248535777948,
'p': 0.9556502129792032,
'r': 0.9873155578565881},
{'a': 0.9605055292259084,
'f1': 0.9787835926449787,
'p': 0.9651324965132496,
'r': 0.9928263988522238},
{'a': 0.9318840579710145,
'f1': 0.9566020313942751,
'p': 0.9316546762589928,
'r': 0.9829222011385199},
{'a': 0.847394540942928,
'f1': 0.8894878706199462,
'p': 0.8256880733944955,
'r': 0.9639727361246349},
{'a': 0.8889570552147239,
'f1': 0.9224174882126017,
'p': 0.8762214983713354,
'r': 0.9737556561085973}],
'thenation.com;2005': [{'a': 0.7414854329093147,
'f1': 0.8374613003095975,
'p': 0.7300944669365722,
'r': 0.9818511796733213},
{'a': 0.5137777777777778,
'f1': 0.6188153310104529,
'p': 0.4563206577595067,
'r': 0.961038961038961},
{'a': 0.7658473479948253,
'f1': 0.852725793327909,
'p': 0.7561327561327561,
'r': 0.9776119402985075},
{'a': 0.849610270518111,
'f1': 0.9101861993428259,
'p': 0.8483920367534457,
'r': 0.9816893089190786},
{'a': 0.5731292517006803,
'f1': 0.6714659685863875,
'p': 0.5202839756592292,
'r': 0.9464944649446494},
{'a': 0.9057798891528107,
'f1': 0.9227774172615184,
'p': 0.8876404494382022,
'r': 0.9608108108108108},
{'a': 0.7099871959026889,
'f1': 0.8026143790849674,
'p': 0.6842496285289748,
'r': 0.9704952581664911},
{'a': 0.8706038487060385,
'f1': 0.8898927159796725,
'p': 0.8668866886688669,
'r': 0.91415313225058},
{'a': 0.7269180754226268,
'f1': 0.8161120840630474,
'p': 0.7039274924471299,
'r': 0.9708333333333333},
{'a': 0.9004950495049505,
'f1': 0.9133247089262614,
'p': 0.9168831168831169,
'r': 0.9097938144329897},
{'a': 0.826677994902294,
'f1': 0.8794326241134752,
'p': 0.8275862068965517,
'r': 0.9382093316519546},
{'a': 0.6467889908256881,
'f1': 0.7636224098234843,
'p': 0.6269691241335854,
'r': 0.9764474975466143},
{'a': 0.8954685890834192,
'f1': 0.913946587537092,
'p': 0.9120135363790186,
'r': 0.9158878504672897},
{'a': 0.7067342505430847,
'f1': 0.8055688910225637,
'p': 0.6894001643385373,
'r': 0.9688221709006929},
{'a': 0.7941558441558442,
'f1': 0.8731492597038816,
'p': 0.7905797101449276,
'r': 0.9749776586237712},
{'a': 0.9431714023831348,
'f1': 0.9671610169491526,
'p': 0.9620653319283456,
'r': 0.972310969116081},
{'a': 0.8959881129271917,
'f1': 0.9042407660738714,
'p': 0.8789893617021277,
'r': 0.9309859154929577},
{'a': 0.9060481503229595,
'f1': 0.9272727272727272,
'p': 0.9082813891362422,
'r': 0.947075208913649},
{'a': 0.8342046303211351,
'f1': 0.8497970230040597,
'p': 0.7733990147783252,
'r': 0.9429429429429429},
{'a': 0.8912901113294041,
'f1': 0.9011904761904763,
'p': 0.8822843822843823,
'r': 0.9209245742092458},
{'a': 0.842873831775701,
'f1': 0.8663686040735221,
'p': 0.8126747437092264,
'r': 0.9276595744680851},
{'a': 0.905373831775701,
'f1': 0.9209756097560975,
'p': 0.9129593810444874,
'r': 0.9291338582677166},
{'a': 0.865615141955836,
'f1': 0.8735905044510386,
'p': 0.8382687927107062,
'r': 0.9120198265179678},
{'a': 0.8798283261802575,
'f1': 0.89937106918239,
'p': 0.8674176776429809,
'r': 0.933768656716418},
{'a': 0.7283018867924528,
'f1': 0.8226600985221675,
'p': 0.712457337883959,
'r': 0.9731934731934732}],
'thenation.com;2010': [{'a': 0.675764192139738,
'f1': 0.6285178236397749,
'p': 0.489766081871345,
'r': 0.8769633507853403},
{'a': 0.6671180931744312,
'f1': 0.6212634822804315,
'p': 0.4818355640535373,
'r': 0.8742411101474414},
{'a': 0.6322725012431626,
'f1': 0.586756077116513,
'p': 0.44043624161073824,
'r': 0.8786610878661087},
{'a': 0.7086073777523592,
'f1': 0.647282796815507,
'p': 0.5114879649890591,
'r': 0.88124410933082},
{'a': 0.7182883341823739,
'f1': 0.7089473684210525,
'p': 0.5836221837088388,
'r': 0.9028150134048257},
{'a': 0.8147023086269745,
'f1': 0.45045045045045046,
'p': 0.3246753246753247,
'r': 0.7352941176470589},
{'a': 0.657844387755102,
'f1': 0.49695264885138307,
'p': 0.3559435862995299,
'r': 0.8229813664596274},
{'a': 0.4285228624851266,
'f1': 0.40663607483233327,
'p': 0.2612244897959184,
'r': 0.9171974522292994},
{'a': 0.8819702602230484,
'f1': 0.9008973858759267,
'p': 0.8726379440665155,
'r': 0.9310483870967742},
{'a': 0.8352638352638353,
'f1': 0.787551867219917,
'p': 0.7684210526315789,
'r': 0.8076595744680851},
{'a': 0.7230172927847347,
'f1': 0.6279535442531037,
'p': 0.5275908479138627,
'r': 0.7754698318496538},
{'a': 0.6940684223480187,
'f1': 0.6834733893557422,
'p': 0.5502255022550225,
'r': 0.9018817204301075},
{'a': 0.6265653869841922,
'f1': 0.6345187864175206,
'p': 0.48584615384615387,
'r': 0.9143022582513028},
{'a': 0.6097623966942148,
'f1': 0.5624094989863887,
'p': 0.4090143218197136,
'r': 0.8999073215940686},
{'a': 0.7384384384384385,
'f1': 0.6553225168183617,
'p': 0.553475935828877,
'r': 0.8031037827352085},
{'a': 0.5977851083883129,
'f1': 0.5124250214224507,
'p': 0.3676229508196721,
'r': 0.8454288407163054},
{'a': 0.7416363034117257,
'f1': 0.6842105263157894,
'p': 0.5588624338624338,
'r': 0.8820459290187892},
{'a': 0.7345368452204795,
'f1': 0.712592117910926,
'p': 0.5868073878627968,
'r': 0.9070146818923328},
{'a': 0.36462324393358875,
'f1': 0.40454817474566124,
'p': 0.2586404795306721,
'r': 0.9281464530892448},
{'a': 0.5604063701263042,
'f1': 0.4226469527587451,
'p': 0.28309178743961355,
'r': 0.833570412517781},
{'a': 0.5278008298755187,
'f1': 0.4557627929220469,
'p': 0.31123448726322667,
'r': 0.8508928571428571},
{'a': 0.8262844166903207,
'f1': 0.80875,
'p': 0.7398513436249285,
'r': 0.8917987594762233},
{'a': 0.5724090597117364,
'f1': 0.5379480840543882,
'p': 0.3837742504409171,
'r': 0.8991735537190083},
{'a': 0.7998363785110445,
'f1': 0.78475073313783,
'p': 0.6904024767801857,
'r': 0.9089673913043478},
{'a': 0.7134107027724049,
'f1': 0.6189455636519503,
'p': 0.4935064935064935,
'r': 0.8298850574712644}],
'thenation.com;2015': [{'a': 0.7011661807580175,
'f1': 0.7466007416563658,
'p': 0.631578947368421,
'r': 0.9128463476070529},
{'a': 0.6158984635938544,
'f1': 0.5607333842627961,
'p': 0.42305475504322765,
'r': 0.8312570781426953},
{'a': 0.6486733760292772,
'f1': 0.6437847866419295,
'p': 0.5090464547677261,
'r': 0.8755256518082423},
{'a': 0.7768453502312039,
'f1': 0.83955177933752,
'p': 0.7604282846308276,
'r': 0.9370533260032985},
{'a': 0.6675358539765319,
'f1': 0.6966452533904354,
'p': 0.567222006974041,
'r': 0.9025893958076449},
{'a': 0.6472923164162178,
'f1': 0.6589912280701754,
'p': 0.5306843267108168,
'r': 0.8691250903832248},
{'a': 0.6458094144661309,
'f1': 0.6523943661971832,
'p': 0.5220919747520288,
'r': 0.8693693693693694},
{'a': 0.543138866064092,
'f1': 0.3110285006195787,
'p': 0.19670846394984326,
'r': 0.742603550295858},
{'a': 0.6071055381400209,
'f1': 0.5534441805225654,
'p': 0.40853302162478083,
'r': 0.8576687116564418},
{'a': 0.6504384638645297,
'f1': 0.6545128511655709,
'p': 0.5179754020813624,
'r': 0.8887987012987013},
{'a': 0.5240253853127833,
'f1': 0.14634146341463414,
'p': 0.08272058823529412,
'r': 0.6338028169014085},
{'a': 0.6274137385248496,
'f1': 0.5923103567717354,
'p': 0.45943041375604515,
'r': 0.8333333333333334},
{'a': 0.650899593731863,
'f1': 0.6559908492993995,
'p': 0.5256645279560037,
'r': 0.8722433460076046},
{'a': 0.6512681159420289,
'f1': 0.6526315789473685,
'p': 0.514218009478673,
'r': 0.8930041152263375},
{'a': 0.6519756838905775,
'f1': 0.6560528687293481,
'p': 0.5182724252491694,
'r': 0.8936170212765957},
{'a': 0.5417523652817771,
'f1': 0.2967171717171717,
'p': 0.18905872888173772,
'r': 0.6891495601173021},
{'a': 0.7307525010874293,
'f1': 0.7867723045125732,
'p': 0.6813842482100239,
'r': 0.9307253463732681},
{'a': 0.7786984031334739,
'f1': 0.8467716699697507,
'p': 0.7636876763875823,
'r': 0.9501404494382022},
{'a': 0.7297186280550421,
'f1': 0.7874677002583979,
'p': 0.6924169270093723,
'r': 0.9127667540247099},
{'a': 0.7475834397227795,
'f1': 0.8104109589041095,
'p': 0.7160493827160493,
'r': 0.9334174818554749},
{'a': 0.6403210867551713,
'f1': 0.6274384393987849,
'p': 0.49520444220090865,
'r': 0.856020942408377},
{'a': 0.6655328798185941,
'f1': 0.6894736842105262,
'p': 0.561990561990562,
'r': 0.8917631041524847},
{'a': 0.7591199699135013,
'f1': 0.8191444303261329,
'p': 0.7310987903225806,
'r': 0.9313001605136436},
{'a': 0.5442651548190144,
'f1': 0.23218221895664953,
'p': 0.14044444444444446,
'r': 0.6694915254237288},
{'a': 0.612482853223594,
'f1': 0.5592823712948518,
'p': 0.41589327146171695,
'r': 0.8535714285714285}],
'www.cnn.com;2000': [{'a': 0.8130899937067338,
'f1': 0.7341092211280216,
'p': 0.68561872909699,
'r': 0.789980732177264},
{'a': 0.7033918691363964,
'f1': 0.7619231511874879,
'p': 0.6567909454061251,
'r': 0.9071264367816092},
{'a': 0.9567706842255941,
'f1': 0.9742628259757967,
'p': 0.9855172413793103,
'r': 0.963262554769127},
{'a': 0.7296494355317885,
'f1': 0.6033129904097646,
'p': 0.4798890429958391,
'r': 0.812206572769953},
{'a': 0.8298865910607072,
'f1': 0.7038327526132403,
'p': 0.62217659137577,
'r': 0.8101604278074866},
{'a': 0.8375254928619986,
'f1': 0.8686813186813187,
'p': 0.8187467633350596,
'r': 0.92510239906378},
{'a': 0.7469262295081968,
'f1': 0.77255985267035,
'p': 0.6549570647931303,
'r': 0.941638608305275},
{'a': 0.812,
'f1': 0.8061056105610561,
'p': 0.8196308724832215,
'r': 0.7930194805194806},
{'a': 0.7922141119221411,
'f1': 0.7634349030470916,
'p': 0.7638580931263859,
'r': 0.7630121816168328},
{'a': 0.8499701135684399,
'f1': 0.8163862472567666,
'p': 0.7994269340974212,
'r': 0.8340807174887892},
{'a': 0.7581291759465479,
'f1': 0.7517146776406034,
'p': 0.648776637726914,
'r': 0.8934782608695652},
{'a': 0.8777838131450298,
'f1': 0.8659916617033949,
'p': 0.8453488372093023,
'r': 0.8876678876678876},
{'a': 0.758496395468589,
'f1': 0.7159297395517868,
'p': 0.606776180698152,
'r': 0.8729689807976366},
{'a': 0.8103021297672115,
'f1': 0.8004168837936425,
'p': 0.7427466150870407,
'r': 0.8677966101694915},
{'a': 0.877246653919694,
'f1': 0.896551724137931,
'p': 0.8798228969006958,
'r': 0.9139290407358739},
{'a': 0.7289398280802293,
'f1': 0.6266771902131018,
'p': 0.5,
'r': 0.8393234672304439},
{'a': 0.7263533610945866,
'f1': 0.727810650887574,
'p': 0.5896452540747843,
'r': 0.9505409582689336},
{'a': 0.7041499330655957,
'f1': 0.44191919191919193,
'p': 0.30594405594405594,
'r': 0.7954545454545454},
{'a': 0.8116094986807388,
'f1': 0.7698259187620888,
'p': 0.766367137355584,
'r': 0.7733160621761658},
{'a': 0.8488805970149254,
'f1': 0.7996702390766693,
'p': 0.7601880877742947,
'r': 0.8434782608695652},
{'a': 0.8317631224764468,
'f1': 0.7093023255813954,
'p': 0.613682092555332,
'r': 0.8402203856749312},
{'a': 0.8600891861761427,
'f1': 0.8767795778105055,
'p': 0.8703703703703703,
'r': 0.8832838773491593},
{'a': 0.8866200967221923,
'f1': 0.9090909090909092,
'p': 0.9009393680614859,
'r': 0.9173913043478261},
{'a': 0.7409985597695631,
'f1': 0.7998516045260621,
'p': 0.7085113374958922,
'r': 0.9182282793867121},
{'a': 0.7255568138920347,
'f1': 0.7238890998860615,
'p': 0.6192332683560754,
'r': 0.8711151736745887}],
'www.cnn.com;2005': [{'a': 0.7973986993496749,
'f1': 0.6505608283002589,
'p': 0.5568685376661743,
'r': 0.7821576763485477},
{'a': 0.7814922480620154,
'f1': 0.6538756715272448,
'p': 0.534504391468005,
'r': 0.841897233201581},
{'a': 0.8120333772507685,
'f1': 0.7855711422845693,
'p': 0.7101449275362319,
'r': 0.8789237668161435},
{'a': 0.7939339875111507,
'f1': 0.7072243346007605,
'p': 0.6421173762945915,
'r': 0.7870239774330042},
{'a': 0.7925133689839572,
'f1': 0.6040816326530613,
'p': 0.4860426929392447,
'r': 0.7978436657681941},
{'a': 0.8149480415667466,
'f1': 0.7729279058361942,
'p': 0.701067615658363,
'r': 0.8612021857923498},
{'a': 0.7992213570634038,
'f1': 0.6518804243008679,
'p': 0.5425361155698234,
'r': 0.8164251207729468},
{'a': 0.7974481658692185,
'f1': 0.6186186186186187,
'p': 0.5132890365448505,
'r': 0.7783375314861462},
{'a': 0.8134087237479806,
'f1': 0.8023952095808382,
'p': 0.7397476340694006,
'r': 0.8766355140186916},
{'a': 0.8281767955801105,
'f1': 0.5576102418207681,
'p': 0.45794392523364486,
'r': 0.7127272727272728},
{'a': 0.8291413703382481,
'f1': 0.7895299145299146,
'p': 0.7147001934235977,
'r': 0.8818615751789977},
{'a': 0.8012170385395537,
'f1': 0.6512455516014235,
'p': 0.5414201183431953,
'r': 0.8169642857142857},
{'a': 0.7987890079180252,
'f1': 0.6940509915014165,
'p': 0.6041923551171393,
'r': 0.8153078202995009},
{'a': 0.8547993019197208,
'f1': 0.8256496227996648,
'p': 0.8047385620915033,
'r': 0.8476764199655766},
{'a': 0.8202293202293203,
'f1': 0.7898516036381045,
'p': 0.7313829787234043,
'r': 0.858480749219563},
{'a': 0.7980817768803634,
'f1': 0.6563573883161512,
'p': 0.5568513119533528,
'r': 0.799163179916318},
{'a': 0.8632313056954669,
'f1': 0.8067870826491517,
'p': 0.7543500511770727,
'r': 0.8670588235294118},
{'a': 0.7817047817047817,
'f1': 0.7172859450726979,
'p': 0.6195348837209302,
'r': 0.8516624040920716},
{'a': 0.7955215085444903,
'f1': 0.6320254506892895,
'p': 0.5173611111111112,
'r': 0.8119891008174387},
{'a': 0.7862723214285714,
'f1': 0.5379975874547648,
'p': 0.41838649155722324,
'r': 0.7533783783783784},
{'a': 0.8092676872155565,
'f1': 0.7634684453565932,
'p': 0.6914498141263941,
'r': 0.852233676975945},
{'a': 0.8084622383985441,
'f1': 0.7710712343665034,
'p': 0.6910331384015594,
'r': 0.8720787207872078},
{'a': 0.819971870604782,
'f1': 0.7408906882591093,
'p': 0.6428571428571429,
'r': 0.8742038216560509},
{'a': 0.8246376811594203,
'f1': 0.6657458563535911,
'p': 0.5751789976133651,
'r': 0.7901639344262295},
{'a': 0.8191964285714286,
'f1': 0.8329896907216495,
'p': 0.7816473189607518,
'r': 0.8915510718789408}],
'www.cnn.com;2010': [{'a': 0.7275031685678074,
'f1': 0.7248880358285349,
'p': 0.6343784994400896,
'r': 0.8455223880597015},
{'a': 0.6324081020255063,
'f1': 0.5346628679962013,
'p': 0.39900779588944013,
'r': 0.8100719424460432},
{'a': 0.7277505255781359,
'f1': 0.7082238077356365,
'p': 0.6224422442244224,
'r': 0.8214285714285714},
{'a': 0.6537997587454765,
'f1': 0.702127659574468,
'p': 0.5600165562913907,
'r': 0.9408901251738526},
{'a': 0.5586563307493541,
'f1': 0.14600000000000002,
'p': 0.09193954659949623,
'r': 0.35436893203883496},
{'a': 0.6614678899082569,
'f1': 0.4728571428571429,
'p': 0.338100102145046,
'r': 0.7862232779097387},
{'a': 0.45656706045865186,
'f1': 0.3919129082426127,
'p': 0.25237856785177765,
'r': 0.8765217391304347},
{'a': 0.6976923076923077,
'f1': 0.6330532212885154,
'p': 0.5191424196018377,
'r': 0.8110047846889952},
{'a': 0.6745749308026888,
'f1': 0.6304445442299056,
'p': 0.5254491017964071,
'r': 0.7878787878787878},
{'a': 0.7147385103011094,
'f1': 0.7341996455995274,
'p': 0.646049896049896,
'r': 0.8502051983584131},
{'a': 0.6349760139555168,
'f1': 0.39303843364757074,
'p': 0.26965174129353237,
'r': 0.7245989304812834},
{'a': 0.6150234741784038,
'f1': 0.5858585858585859,
'p': 0.4628307433851323,
'r': 0.7979724837074583},
{'a': 0.6288178224937119,
'f1': 0.5872952457051538,
'p': 0.4596622889305816,
'r': 0.8130530973451328},
{'a': 0.660952380952381,
'f1': 0.6959863364645602,
'p': 0.5705285264263213,
'r': 0.8921729611384783},
{'a': 0.21855983772819473,
'f1': 0.10666666666666667,
'p': 0.05723172628304821,
'r': 0.7829787234042553},
{'a': 0.6026184058529072,
'f1': 0.45454545454545453,
'p': 0.3225806451612903,
'r': 0.7692307692307693},
{'a': 0.6807069219440354,
'f1': 0.7090713902308106,
'p': 0.6051305542830967,
'r': 0.8561244329228775},
{'a': 0.605606258148631,
'f1': 0.5437405731523378,
'p': 0.41460609545715926,
'r': 0.7897042716319824},
{'a': 0.7437995397596523,
'f1': 0.7909015025041735,
'p': 0.7159047978843974,
'r': 0.8834498834498834},
{'a': 0.5363106014886341,
'f1': 0.41541973116916053,
'p': 0.28477051460361613,
'r': 0.767572633552015},
{'a': 0.11181766218919692,
'f1': 0.0687691961944715,
'p': 0.03584817244611059,
'r': 0.8422018348623853},
{'a': 0.6964824120603015,
'f1': 0.702950819672131,
'p': 0.6025857223159078,
'r': 0.8434303697875688},
{'a': 0.6620408163265306,
'f1': 0.5460526315789473,
'p': 0.4188393608074012,
'r': 0.784251968503937},
{'a': 0.7014111610006415,
'f1': 0.681491618200479,
'p': 0.5817757009345794,
'r': 0.8224607762180016},
{'a': 0.6079678607298292,
'f1': 0.5402434236356498,
'p': 0.4105011933174224,
'r': 0.7898966704936854}],
'www.cnn.com;2015': [{'a': 0.5461303017052908,
'f1': 0.6239130434782609,
'p': 0.47385800770500824,
'r': 0.9130434782608695},
{'a': 0.30808337569903405,
'f1': 0.3601316408086506,
'p': 0.22635933806146571,
'r': 0.8804597701149425},
{'a': 0.3871693866066404,
'f1': 0.33475870494807575,
'p': 0.21076923076923076,
'r': 0.8130563798219584},
{'a': 0.237528699645168,
'f1': 0.29844440176685233,
'p': 0.1796116504854369,
'r': 0.8819523269012486},
{'a': 0.28893905191873587,
'f1': 0.34402332361516036,
'p': 0.21354705274043434,
'r': 0.8843683083511777},
{'a': 0.487090367428004,
'f1': 0.5143394452280208,
'p': 0.3690958164642375,
'r': 0.8480620155038759},
{'a': 0.44073455759599334,
'f1': 0.4450579790171176,
'p': 0.30142109199700823,
'r': 0.8502109704641351},
{'a': 0.5386666666666666,
'f1': 0.5942142298670837,
'p': 0.4439252336448598,
'r': 0.8983451536643026},
{'a': 0.21576673866090712,
'f1': 0.2719069580910367,
'p': 0.1609304533586518,
'r': 0.875968992248062},
{'a': 0.10892214434551999,
'f1': 0.07365104371799922,
'p': 0.038901601830663615,
'r': 0.6900369003690037},
{'a': 0.30060493252675663,
'f1': 0.3346613545816733,
'p': 0.2074643249176729,
'r': 0.8649885583524027},
{'a': 0.2972493345164153,
'f1': 0.21739130434782608,
'p': 0.12746234067207415,
'r': 0.738255033557047},
{'a': 0.5377104377104377,
'f1': 0.551453773276707,
'p': 0.402479732951836,
'r': 0.8755186721991701},
{'a': 0.6343705799151343,
'f1': 0.731009365244537,
'p': 0.5955913522679102,
'r': 0.9461279461279462},
{'a': 0.4623908663532572,
'f1': 0.5923096511331806,
'p': 0.4293097083794758,
'r': 0.9548440065681445},
{'a': 0.5055798156234838,
'f1': 0.5628485628485629,
'p': 0.40923268870867124,
'r': 0.9010989010989011},
{'a': 0.20300230946882217,
'f1': 0.22848200312989042,
'p': 0.13204134366925063,
'r': 0.8474295190713101},
{'a': 0.40312876052948254,
'f1': 0.36,
'p': 0.23153526970954358,
'r': 0.808695652173913},
{'a': 0.5251872021783526,
'f1': 0.5427728613569321,
'p': 0.39372325249643364,
'r': 0.8734177215189873},
{'a': 0.29772374547335745,
'f1': 0.3710910354412787,
'p': 0.235657546337158,
'r': 0.8725490196078431},
{'a': 0.2304075235109718,
'f1': 0.24980901451489684,
'p': 0.14617791685292802,
'r': 0.8582677165354331},
{'a': 0.6290977208866687,
'f1': 0.7113702623906706,
'p': 0.5729941291585127,
'r': 0.9378603459320948},
{'a': 0.4444444444444444,
'f1': 0.4135188866799205,
'p': 0.27030539311241064,
'r': 0.879492600422833},
{'a': 0.5819144911085887,
'f1': 0.6747129820429792,
'p': 0.526896551724138,
'r': 0.9378068739770867},
{'a': 0.4675090252707581,
'f1': 0.4082246740220662,
'p': 0.27352150537634407,
'r': 0.8043478260869565}],
'www.esquire.com;2000': [{'a': 0.9610738255033557,
'f1': 0.9452830188679244,
'p': 0.9488636363636364,
'r': 0.9417293233082706},
{'a': 0.9625829812914907,
'f1': 0.9585006693440428,
'p': 0.9636608344549125,
'r': 0.9533954727030626},
{'a': 0.9491106719367589,
'f1': 0.9352608422375865,
'p': 0.9649805447470817,
'r': 0.9073170731707317},
{'a': 0.9604743083003953,
'f1': 0.9330357142857143,
'p': 0.9393258426966292,
'r': 0.926829268292683},
{'a': 0.9659798754192621,
'f1': 0.9649382716049383,
'p': 0.9731075697211156,
'r': 0.9569049951028403},
{'a': 0.9618320610687023,
'f1': 0.9403578528827038,
'p': 0.946,
'r': 0.9347826086956522},
{'a': 0.8702734147760326,
'f1': 0.7635206786850478,
'p': 0.7003891050583657,
'r': 0.8391608391608392},
{'a': 0.9437291368621841,
'f1': 0.9423264907135875,
'p': 0.9698189134808853,
'r': 0.9163498098859315},
{'a': 0.9411764705882353,
'f1': 0.8988988988988988,
'p': 0.9432773109243697,
'r': 0.858508604206501},
{'a': 0.954456415279138,
'f1': 0.9556931872320154,
'p': 0.9737864077669903,
'r': 0.9382600561272217},
{'a': 0.9400584795321637,
'f1': 0.9076576576576577,
'p': 0.9372093023255814,
'r': 0.8799126637554585},
{'a': 0.9430379746835443,
'f1': 0.9165964616680706,
'p': 0.9527145359019265,
'r': 0.8831168831168831},
{'a': 0.9679519278918377,
'f1': 0.96529284164859,
'p': 0.9705561613958561,
'r': 0.9600862998921251},
{'a': 0.9479048697621744,
'f1': 0.9159049360146252,
'p': 0.9488636363636364,
'r': 0.8851590106007067},
{'a': 0.9504480759093306,
'f1': 0.9304733727810651,
'p': 0.9588414634146342,
'r': 0.9037356321839081},
{'a': 0.9373088685015291,
'f1': 0.8918205804749341,
'p': 0.9234972677595629,
'r': 0.8622448979591837},
{'a': 0.9499749874937469,
'f1': 0.9528746465598492,
'p': 0.9674641148325359,
'r': 0.9387186629526463},
{'a': 0.950109649122807,
'f1': 0.9334308705193854,
'p': 0.9579579579579579,
'r': 0.9101283880171184},
{'a': 0.9045736871823828,
'f1': 0.842203548085901,
'p': 0.803921568627451,
'r': 0.884313725490196},
{'a': 0.8260325406758448,
'f1': 0.576219512195122,
'p': 0.4833759590792839,
'r': 0.7132075471698113},
{'a': 0.9553314121037464,
'f1': 0.9345991561181434,
'p': 0.9425531914893617,
'r': 0.9267782426778243},
{'a': 0.9464387464387465,
'f1': 0.9426829268292682,
'p': 0.9650436953807741,
'r': 0.9213349225268176},
{'a': 0.943345804382683,
'f1': 0.9206586826347306,
'p': 0.9564541213063764,
'r': 0.8874458874458875},
{'a': 0.1,
'f1': 0.09999999999999999,
'p': 0.05555555555555555,
'r': 0.5},
{'a': 0.9533295389869095,
'f1': 0.9243542435424354,
'p': 0.9488636363636364,
'r': 0.9010791366906474}],
'www.esquire.com;2005': [{'a': 0.9530398322851154,
'f1': 0.9464114832535886,
'p': 0.9611273080660836,
'r': 0.9321394910461829},
{'a': 0.9465422146796776,
'f1': 0.9371884346959123,
'p': 0.9572301425661914,
'r': 0.91796875},
{'a': 0.9585714285714285,
'f1': 0.9390329362298528,
'p': 0.938375350140056,
'r': 0.9396914446002805},
{'a': 0.9636363636363636,
'f1': 0.9542857142857142,
'p': 0.9553775743707094,
'r': 0.9531963470319634},
{'a': 0.9630901287553648,
'f1': 0.9570858283433133,
'p': 0.9609218436873748,
'r': 0.9532803180914513},
{'a': 0.9211159211159211,
'f1': 0.8790560471976402,
'p': 0.8989441930618401,
'r': 0.86002886002886},
{'a': 0.9058993847267462,
'f1': 0.9183417085427136,
'p': 0.8702380952380953,
'r': 0.9720744680851063},
{'a': 0.9181996086105675,
'f1': 0.9040844424047728,
'p': 0.8747779751332149,
'r': 0.9354226020892688},
{'a': 0.9591222030981067,
'f1': 0.9522373051784816,
'p': 0.958502024291498,
'r': 0.9460539460539461},
{'a': 0.9603463992707384,
'f1': 0.9502572898799314,
'p': 0.9432463110102156,
'r': 0.9573732718894009},
{'a': 0.9429404414827155,
'f1': 0.9302798982188295,
'p': 0.9185929648241206,
'r': 0.9422680412371134},
{'a': 0.920041004613019,
'f1': 0.8664383561643836,
'p': 0.840531561461794,
'r': 0.8939929328621908},
{'a': 0.9002638522427441,
'f1': 0.815968841285297,
'p': 0.7688073394495413,
'r': 0.8692946058091287},
{'a': 0.9523595505617978,
'f1': 0.9440928270042194,
'p': 0.9582441113490364,
'r': 0.9303534303534303},
{'a': 0.9564459930313589,
'f1': 0.9479166666666666,
'p': 0.9479166666666666,
'r': 0.9479166666666666},
{'a': 0.8827899298390425,
'f1': 0.8565656565656565,
'p': 0.7969924812030075,
'r': 0.925764192139738},
{'a': 0.9586449626044875,
'f1': 0.9499467518636849,
'p': 0.958109559613319,
'r': 0.941921858500528},
{'a': 0.9640317858636553,
'f1': 0.9595484477892757,
'p': 0.9622641509433962,
'r': 0.9568480300187617},
{'a': 0.9663256606990622,
'f1': 0.9613313754282917,
'p': 0.958984375,
'r': 0.9636898920510304},
{'a': 0.9699303263659699,
'f1': 0.9707142857142858,
'p': 0.9714081486776269,
'r': 0.9700214132762313},
{'a': 0.9461196243203164,
'f1': 0.920611798980335,
'p': 0.9390787518573551,
'r': 0.9028571428571428},
{'a': 0.948937908496732,
'f1': 0.943155979990905,
'p': 0.9308797127468582,
'r': 0.9557603686635945},
{'a': 0.9541052631578948,
'f1': 0.9472665699080792,
'p': 0.9616895874263262,
'r': 0.9332697807435653},
{'a': 0.9571852479864349,
'f1': 0.9505628976994616,
'p': 0.960435212660732,
'r': 0.9408914728682171},
{'a': 0.9658650116369278,
'f1': 0.9645732689210951,
'p': 0.9684721099434115,
'r': 0.9607056936647955}],
'www.esquire.com;2010': [{'a': 0.5643207012116525,
'f1': 0.4433465085638999,
'p': 0.30660592255125285,
'r': 0.8002378121284186},
{'a': 0.6595404595404596,
'f1': 0.711284310403253,
'p': 0.5828936406553735,
'r': 0.9122120817036071},
{'a': 0.4677680596047943,
'f1': 0.20742884708152434,
'p': 0.12092238470191226,
'r': 0.7288135593220338},
{'a': 0.5635330578512396,
'f1': 0.5412595005428882,
'p': 0.3940711462450593,
'r': 0.8639514731369151},
{'a': 0.5767780849459663,
'f1': 0.5699693564862105,
'p': 0.426279602750191,
'r': 0.8597842835130971},
{'a': 0.530896150113232,
'f1': 0.4304791830322074,
'p': 0.2860125260960334,
'r': 0.8698412698412699},
{'a': 0.4075716234652115,
'f1': 0.286652977412731,
'p': 0.1713303878252332,
'r': 0.8768844221105527},
{'a': 0.5689839572192513,
'f1': 0.5470213563132258,
'p': 0.3924731182795699,
'r': 0.9023485784919654},
{'a': 0.5669208519589798,
'f1': 0.5385261978145138,
'p': 0.3895419537900284,
'r': 0.8720508166969148},
{'a': 0.5872011251758087,
'f1': 0.6014935505770536,
'p': 0.4532742155525239,
'r': 0.8937457969065232},
{'a': 0.5289658906334597,
'f1': 0.45861854387056633,
'p': 0.3207136640557006,
'r': 0.8045851528384279},
{'a': 0.5566271700192891,
'f1': 0.5137503777576308,
'p': 0.3648068669527897,
'r': 0.8682328907048008},
{'a': 0.6042534531900899,
'f1': 0.6395046934291992,
'p': 0.49050245098039214,
'r': 0.9185312679288583},
{'a': 0.5003152585119798,
'f1': 0.31355565179731487,
'p': 0.19738276990185388,
'r': 0.7621052631578947},
{'a': 0.5922096657850445,
'f1': 0.5956127801621364,
'p': 0.45237232886635276,
'r': 0.8715980460572226},
{'a': 0.49784791965566716,
'f1': 0.2143658810325477,
'p': 0.12402597402597403,
'r': 0.7892561983471075},
{'a': 0.3755117231112765,
'f1': 0.17583497053045186,
'p': 0.0982436882546652,
'r': 0.8364485981308412},
{'a': 0.5544525547445256,
'f1': 0.5058290155440415,
'p': 0.35164340387212967,
'r': 0.9008073817762399},
{'a': 0.568724279835391,
'f1': 0.5549263873159682,
'p': 0.40312628547922663,
'r': 0.8900999091734787},
{'a': 0.6699975018735949,
'f1': 0.681763430498675,
'p': 0.5465430668211665,
'r': 0.9058898847631242},
{'a': 0.40389294403892945,
'f1': 0.26109435588108576,
'p': 0.15380710659898478,
'r': 0.8632478632478633},
{'a': 0.391304347826087,
'f1': 0.2546583850931677,
'p': 0.1490134994807892,
'r': 0.875},
{'a': 0.6918226600985221,
'f1': 0.7528445006321113,
'p': 0.629492600422833,
'r': 0.9363207547169812},
{'a': 0.5951573849878935,
'f1': 0.6032273374466066,
'p': 0.45785302593659943,
'r': 0.8838664812239221},
{'a': 0.630575117370892,
'f1': 0.6488145048814504,
'p': 0.5043365134431916,
'r': 0.9093041438623924}],
'www.esquire.com;2015': [{'a': 0.28155849110591824,
'f1': 0.01374795417348609,
'p': 0.006949500297835727,
'r': 0.6325301204819277},
{'a': 0.28711102754536055,
'f1': 0.03327383987761346,
'p': 0.017044341409260106,
'r': 0.696},
{'a': 0.27682545695615113,
'f1': 0.014328127016909773,
'p': 0.007239287810604579,
'r': 0.6894409937888198},
{'a': 0.46926977687626775,
'f1': 0.08112379280070238,
'p': 0.04307290695506247,
'r': 0.6957831325301205},
{'a': 0.3115534984047095,
'f1': 0.13151927437641722,
'p': 0.07138900855437258,
'r': 0.8339324227174695},
{'a': 0.3296091814111203,
'f1': 0.1882402484602832,
'p': 0.10571124512238382,
'r': 0.8583773403744599},
{'a': 0.2885415703320078,
'f1': 0.06056905605079986,
'p': 0.03155015584250366,
'r': 0.7549467275494672},
{'a': 0.28600444003589814,
'f1': 0.03177043300025621,
'p': 0.016255899318300997,
'r': 0.6966292134831461},
{'a': 0.3803981623277182,
'f1': 0.17495921696574226,
'p': 0.09766647694934548,
'r': 0.8387096774193549},
{'a': 0.480719397828233,
'f1': 0.5357822453876065,
'p': 0.37498552403011004,
'r': 0.9380069524913094},
{'a': 0.2905982905982906,
'f1': 0.05122118808170405,
'p': 0.026523482986156036,
'r': 0.7441016333938294},
{'a': 0.42642440556303274,
'f1': 0.22177133001927563,
'p': 0.12893712398254098,
'r': 0.7920289855072464},
{'a': 0.28823722302899707,
'f1': 0.04500314267756129,
'p': 0.023218107529671184,
'r': 0.7291242362525459},
{'a': 0.3103961736305388,
'f1': 0.11633420063602197,
'p': 0.06269084564092976,
'r': 0.8060897435897436},
{'a': 0.30111370823594114,
'f1': 0.08463893390959842,
'p': 0.04466265441875198,
'r': 0.8066361556064073},
{'a': 0.36065963688258146,
'f1': 0.0967741935483871,
'p': 0.05160673754629076,
'r': 0.7755834829443446},
{'a': 0.289927787677014,
'f1': 0.05231866825208085,
'p': 0.02706727967363854,
'r': 0.7798507462686567},
{'a': 0.28603752239087393,
'f1': 0.024475074069303104,
'p': 0.012450851900393184,
'r': 0.7142857142857143},
{'a': 0.3594932674687276,
'f1': 0.09215132693393563,
'p': 0.04895608351331893,
'r': 0.783109404990403},
{'a': 0.320497058048652,
'f1': 0.1641911963273022,
'p': 0.090838462917588,
'r': 0.8529741863075196},
{'a': 0.28706446607419944,
'f1': 0.023192887514495556,
'p': 0.011798636601992658,
'r': 0.6766917293233082},
{'a': 0.4193067197045035,
'f1': 0.3059682485779777,
'p': 0.18523848684210525,
'r': 0.878595806923452},
{'a': 0.33102908569192646,
'f1': 0.18479470198675496,
'p': 0.10366759793140344,
'r': 0.8499025341130604},
{'a': 0.32755466504050873,
'f1': 0.1720476241553148,
'p': 0.09607666966157533,
'r': 0.822142491030241},
{'a': 0.35135792460478316,
'f1': 0.058816609810610515,
'p': 0.03058103975535168,
'r': 0.7668711656441718},
{'a': 0.28490255928621744,
'f1': 0.034369055168040584,
'p': 0.01760998115537072,
'r': 0.7112860892388452}],
'www.forbes.com;2000': [{'a': 0.7603195739014648,
'f1': 0.7727272727272727,
'p': 0.6777408637873754,
'r': 0.8986784140969163},
{'a': 0.741304347826087,
'f1': 0.75564681724846,
'p': 0.6216216216216216,
'r': 0.9633507853403142},
{'a': 0.8787784356497351,
'f1': 0.9231073334651118,
'p': 0.8801356954391255,
'r': 0.9704904405652536},
{'a': 0.7747368421052632,
'f1': 0.7995003123048094,
'p': 0.6837606837606838,
'r': 0.9624060150375939},
{'a': 0.6669542709232097,
'f1': 0.5150753768844221,
'p': 0.36541889483065954,
'r': 0.8723404255319149},
{'a': 0.7552631578947369,
'f1': 0.757496740547588,
'p': 0.6579841449603624,
'r': 0.8924731182795699},
{'a': 0.8159443552701979,
'f1': 0.8610662358642972,
'p': 0.7781021897810219,
'r': 0.9638336347197106},
{'a': 0.717391304347826,
'f1': 0.6672550750220653,
'p': 0.5550660792951542,
'r': 0.8362831858407079},
{'a': 0.8219106957424714,
'f1': 0.8670027142303218,
'p': 0.7829131652661064,
'r': 0.9713292788879235},
{'a': 0.9711538461538461,
'f1': 0.9811202013845186,
'p': 0.9755944931163955,
'r': 0.9867088607594937},
{'a': 0.711376404494382,
'f1': 0.6888720666161998,
'p': 0.5963302752293578,
'r': 0.8154121863799283},
{'a': 0.880854252529037,
'f1': 0.9228155339805825,
'p': 0.8740229885057471,
'r': 0.9773778920308483},
{'a': 0.6704361873990307,
'f1': 0.5903614457831325,
'p': 0.49830508474576274,
'r': 0.7241379310344828},
{'a': 0.7071742313323572,
'f1': 0.661590524534687,
'p': 0.5634005763688761,
'r': 0.8012295081967213},
{'a': 0.6441837732160313,
'f1': 0.4468085106382979,
'p': 0.32450331125827814,
'r': 0.7170731707317073},
{'a': 0.7682789651293588,
'f1': 0.8140794223826715,
'p': 0.714172604908947,
'r': 0.9464847848898216},
{'a': 0.8058455114822547,
'f1': 0.8495145631067961,
'p': 0.7658643326039387,
'r': 0.9536784741144414},
{'a': 0.7821052631578947,
'f1': 0.8318440292445167,
'p': 0.7361610352264558,
'r': 0.9561157796451915},
{'a': 0.7606382978723404,
'f1': 0.7844598190526876,
'p': 0.7012369172216937,
'r': 0.8900966183574879},
{'a': 0.6845637583892618,
'f1': 0.36199095022624433,
'p': 0.23904382470119523,
'r': 0.7453416149068323},
{'a': 0.9127272727272727,
'f1': 0.84,
'p': 0.7777777777777778,
'r': 0.9130434782608695},
{'a': 0.7254335260115607,
'f1': 0.6900489396411094,
'p': 0.5834482758620689,
'r': 0.844311377245509},
{'a': 0.8086194302410519,
'f1': 0.8335451080050825,
'p': 0.7446083995459705,
'r': 0.9466089466089466},
{'a': 0.7429359062715368,
'f1': 0.7534699272967614,
'p': 0.641169853768279,
'r': 0.9134615384615384},
{'a': 0.8815298507462687,
'f1': 0.9247778874629812,
'p': 0.8794592564776568,
'r': 0.9750208159866778},
{'a': 0.7494922139471902,
'f1': 0.7787081339712919,
'p': 0.6636085626911316,
'r': 0.9421128798842258},
{'a': 0.733142037302726,
'f1': 0.7138461538461538,
'p': 0.5895806861499364,
'r': 0.9044834307992202},
{'a': 0.9517426273458445,
'f1': 0.9647058823529412,
'p': 0.9669811320754716,
'r': 0.9624413145539906}],
'www.forbes.com;2005': [{'a': 0.922463768115942,
'f1': 0.5868725868725868,
'p': 0.4935064935064935,
'r': 0.7238095238095238},
{'a': 0.893611404435058,
'f1': 0.8388644542183126,
'p': 0.8026013771996939,
'r': 0.8785594639865997},
{'a': 0.923974540311174,
'f1': 0.7174770039421814,
'p': 0.6807980049875312,
'r': 0.7583333333333333},
{'a': 0.8789716926632004,
'f1': 0.7067879636109169,
'p': 0.6242274412855378,
'r': 0.8145161290322581},
{'a': 0.9320754716981132,
'f1': 0.8411764705882353,
'p': 0.8125,
'r': 0.8719512195121951},
{'a': 0.9328429804924848,
'f1': 0.8467153284671532,
'p': 0.8215297450424929,
'r': 0.8734939759036144},
{'a': 0.9169241331960178,
'f1': 0.6620111731843575,
'p': 0.5895522388059702,
'r': 0.7547770700636943},
{'a': 0.9158091674462114,
'f1': 0.7926267281105991,
'p': 0.7510917030567685,
'r': 0.8390243902439024},
{'a': 0.9332394366197183,
'f1': 0.8892005610098177,
'p': 0.879740980573543,
'r': 0.8988657844990549},
{'a': 0.8787878787878788,
'f1': 0.0125,
'p': 0.0078125,
'r': 0.03125},
{'a': 0.9221065909807632,
'f1': 0.6535764375876578,
'p': 0.6348773841961853,
'r': 0.6734104046242775},
{'a': 0.9324227174694465,
'f1': 0.7062499999999999,
'p': 0.6420454545454546,
'r': 0.7847222222222222},
{'a': 0.9324990519529769,
'f1': 0.5972850678733032,
'p': 0.5116279069767442,
'r': 0.717391304347826},
{'a': 0.897003745318352,
'f1': 0.7887323943661972,
'p': 0.7368421052631579,
'r': 0.8484848484848485},
{'a': 0.923697270471464,
'f1': 0.8282122905027932,
'p': 0.8179310344827586,
'r': 0.8387553041018387},
{'a': 0.9231056364315043,
'f1': 0.6199261992619928,
'p': 0.56,
'r': 0.6942148760330579},
{'a': 0.9271175311884439,
'f1': 0.8388969521044993,
'p': 0.8210227272727273,
'r': 0.857566765578635},
{'a': 0.9274785801713586,
'f1': 0.8657223796033995,
'p': 0.854586129753915,
'r': 0.8771526980482205},
{'a': 0.9183731513083049,
'f1': 0.8444444444444444,
'p': 0.8358369098712446,
'r': 0.8532311062431545},
{'a': 0.9288114879315612,
'f1': 0.8535512256442489,
'p': 0.8382716049382716,
'r': 0.8693982074263764},
{'a': 0.8757146408153119,
'f1': 0.7795414462081128,
'p': 0.7163695299837926,
'r': 0.8549323017408124},
{'a': 0.918732350172576,
'f1': 0.778063410454156,
'p': 0.7566666666666667,
'r': 0.800705467372134},
{'a': 0.918977202711029,
'f1': 0.8101083032490974,
'p': 0.7912552891396333,
'r': 0.8298816568047337},
{'a': 0.926164136866478,
'f1': 0.8929503916449086,
'p': 0.9047619047619048,
'r': 0.8814432989690721},
{'a': 0.9195926025194318,
'f1': 0.8584905660377359,
'p': 0.8363970588235294,
'r': 0.8817829457364341}],
'www.forbes.com;2010': [{'a': 0.803030303030303,
'f1': 0.4956896551724137,
'p': 0.4121863799283154,
'r': 0.6216216216216216},
{'a': 0.8355041003376749,
'f1': 0.8311045071817731,
'p': 0.8121974830590513,
'r': 0.8509127789046653},
{'a': 0.834983498349835,
'f1': 0.7706422018348623,
'p': 0.6666666666666666,
'r': 0.9130434782608695},
{'a': 0.8385175380542687,
'f1': 0.7881944444444444,
'p': 0.6941896024464832,
'r': 0.9116465863453815},
{'a': 0.8763654419066534,
'f1': 0.8819345661450925,
'p': 0.8355795148247979,
'r': 0.9337349397590361},
{'a': 0.8413705583756346,
'f1': 0.7990353697749196,
'p': 0.7213352685050798,
'r': 0.8954954954954955},
{'a': 0.8154583582983823,
'f1': 0.7220216606498195,
'p': 0.6279434850863422,
'r': 0.8492569002123143},
{'a': 0.8404907975460123,
'f1': 0.8281249999999999,
'p': 0.7291005291005291,
'r': 0.9582753824756607},
{'a': 0.8345111896348646,
'f1': 0.8068728522336769,
'p': 0.7264851485148515,
'r': 0.9072642967542504},
{'a': 0.8261648745519713,
'f1': 0.742249778565102,
'p': 0.6506211180124224,
'r': 0.8639175257731959},
{'a': 0.8678511937812327,
'f1': 0.8598351001177856,
'p': 0.7883369330453563,
'r': 0.9455958549222798},
{'a': 0.8579756226815051,
'f1': 0.8538713195201745,
'p': 0.7997957099080695,
'r': 0.9157894736842105},
{'a': 0.8235294117647058,
'f1': 0.7715481171548119,
'p': 0.6749633967789166,
'r': 0.900390625},
{'a': 0.8467322151532678,
'f1': 0.8222669349429913,
'p': 0.7512254901960784,
'r': 0.9081481481481481},
{'a': 0.822998193859121,
'f1': 0.7762557077625571,
'p': 0.6737120211360634,
'r': 0.9156193895870736},
{'a': 0.8538390379278445,
'f1': 0.8438735177865613,
'p': 0.7870967741935484,
'r': 0.9094781682641108},
{'a': 0.7989521938441388,
'f1': 0.7039537126325941,
'p': 0.6684981684981685,
'r': 0.7433808553971487},
{'a': 0.8128772635814889,
'f1': 0.7360454115421002,
'p': 0.7021660649819494,
'r': 0.7733598409542743},
{'a': 0.8265830005704506,
'f1': 0.7342657342657343,
'p': 0.6552262090483619,
'r': 0.8349900596421471},
{'a': 0.8058429701765064,
'f1': 0.7498039215686273,
'p': 0.6297760210803689,
'r': 0.9263565891472868},
{'a': 0.8155028827674567,
'f1': 0.7587939698492463,
'p': 0.7365853658536585,
'r': 0.7823834196891192},
{'a': 0.5447540011855364,
'f1': 0.1812366737739872,
'p': 0.10278113663845223,
'r': 0.7657657657657657},
{'a': 0.5952802359882006,
'f1': 0.3466666666666667,
'p': 0.21487603305785125,
'r': 0.896551724137931},
{'a': 0.818087318087318,
'f1': 0.8064159292035399,
'p': 0.7098344693281402,
'r': 0.9334186939820742},
{'a': 0.8540609137055838,
'f1': 0.8275862068965517,
'p': 0.739946380697051,
'r': 0.9387755102040817}],
'www.forbes.com;2015': [{'a': 0.6009918845807033,
'f1': 0.5982750794371311,
'p': 0.45354439091534754,
'r': 0.8786666666666667},
{'a': 0.5228988424760946,
'f1': 0.4397163120567376,
'p': 0.30194805194805197,
'r': 0.808695652173913},
{'a': 0.5684647302904564,
'f1': 0.4474616292798111,
'p': 0.31530782029950083,
'r': 0.7703252032520326},
{'a': 0.5637982195845698,
'f1': 0.486013986013986,
'p': 0.347789824854045,
'r': 0.8065764023210832},
{'a': 0.5737037037037037,
'f1': 0.5218113834648941,
'p': 0.38456827924066134,
'r': 0.8113695090439277},
{'a': 0.5261813537675607,
'f1': 0.5360566902876198,
'p': 0.3793510324483776,
'r': 0.9133522727272727},
{'a': 0.6267262388302194,
'f1': 0.6455842653297338,
'p': 0.5097442143727162,
'r': 0.8801261829652997},
{'a': 0.6165389527458492,
'f1': 0.5712245626561941,
'p': 0.45454545454545453,
'r': 0.7684918347742555},
{'a': 0.43635551585529253,
'f1': 0.3984747378455672,
'p': 0.2570725707257073,
'r': 0.885593220338983},
{'a': 0.3153623188405797,
'f1': 0.2385557704706641,
'p': 0.13941220798794274,
'r': 0.8258928571428571},
{'a': 0.5164212910532276,
'f1': 0.3200636942675159,
'p': 0.20447609359104782,
'r': 0.7362637362637363},
{'a': 0.56483191725157,
'f1': 0.602832097100472,
'p': 0.4554253693326541,
'r': 0.8913260219341974},
{'a': 0.608122179798681,
'f1': 0.6417010472865756,
'p': 0.5017369727047146,
'r': 0.8899647887323944},
{'a': 0.5861520095503382,
'f1': 0.607250755287009,
'p': 0.4612736660929432,
'r': 0.8883977900552487},
{'a': 0.5475171232876712,
'f1': 0.5086006508600651,
'p': 0.36321381142098275,
'r': 0.8480620155038759},
{'a': 0.5645315487571702,
'f1': 0.48791455874086564,
'p': 0.3472,
'r': 0.8204158790170132},
{'a': 0.5972944849115505,
'f1': 0.5044814340588989,
'p': 0.3835171966255678,
'r': 0.7369077306733167},
{'a': 0.5224932249322494,
'f1': 0.36297903109182933,
'p': 0.23882017126546146,
'r': 0.7560240963855421},
{'a': 0.49003466204506063,
'f1': 0.46862302483069984,
'p': 0.31860036832412525,
'r': 0.8856655290102389},
{'a': 0.5722679200940071,
'f1': 0.5997067448680351,
'p': 0.4487109160724081,
'r': 0.9038674033149171},
{'a': 0.41139240506329117,
'f1': 0.26235509456985967,
'p': 0.15774027879677183,
'r': 0.7789855072463768},
{'a': 0.5893101873001371,
'f1': 0.5466464952092789,
'p': 0.40813253012048195,
'r': 0.8274809160305343},
{'a': 0.36214185063410054,
'f1': 0.09345794392523364,
'p': 0.050468637346791634,
'r': 0.6306306306306306},
{'a': 0.5615592435353145,
'f1': 0.5968772178850248,
'p': 0.44263157894736843,
'r': 0.9161220043572985},
{'a': 0.5688073394495413,
'f1': 0.531405782652044,
'p': 0.3867924528301887,
'r': 0.8487261146496815}],
'www.foxnews.com;2000': [{'a': 0.9518828451882845,
'f1': 0.9187279151943463,
'p': 0.9285714285714286,
'r': 0.9090909090909091},
{'a': 0.960635359116022,
'f1': 0.8800000000000001,
'p': 0.9330357142857143,
'r': 0.8326693227091634},
{'a': 0.9295958279009127,
'f1': 0.9021739130434783,
'p': 0.8691099476439791,
'r': 0.9378531073446328},
{'a': 0.9138438880706922,
'f1': 0.8956289027653881,
'p': 0.899641577060932,
'r': 0.8916518650088809},
{'a': 0.9568106312292359,
'f1': 0.8987012987012987,
'p': 0.9301075268817204,
'r': 0.8693467336683417},
{'a': 0.9374437443744374,
'f1': 0.9506567270145545,
'p': 0.926002766251729,
'r': 0.9766593727206418},
{'a': 0.7709205020920502,
'f1': 0.7818725099601594,
'p': 0.6618887015177066,
'r': 0.9549878345498783},
{'a': 0.9540372670807453,
'f1': 0.9357638888888891,
'p': 0.9373913043478261,
'r': 0.9341421143847487},
{'a': 0.9479315263908702,
'f1': 0.9557575757575759,
'p': 0.9651162790697675,
'r': 0.946578631452581},
{'a': 0.9376609994848016,
'f1': 0.8826382153249273,
'p': 0.9191919191919192,
'r': 0.8488805970149254},
{'a': 0.966804979253112,
'f1': 0.9065420560747663,
'p': 0.9326923076923077,
'r': 0.8818181818181818},
{'a': 0.8981636060100167,
'f1': 0.8539505187549881,
'p': 0.816793893129771,
'r': 0.8946488294314381},
{'a': 0.9306184012066365,
'f1': 0.8696883852691218,
'p': 0.8319783197831978,
'r': 0.9109792284866469},
{'a': 0.9382022471910112,
'f1': 0.9022222222222221,
'p': 0.8638297872340426,
'r': 0.9441860465116279},
{'a': 0.2631578947368421,
'f1': 0.3,
'p': 0.17647058823529413,
'r': 1.0},
{'a': 0.9135060129509713,
'f1': 0.9034589571502323,
'p': 0.8901322482197355,
'r': 0.9171907756813418},
{'a': 0.9339651482726995,
'f1': 0.9274680993955676,
'p': 0.933739012846518,
'r': 0.9212808539026017},
{'a': 0.9387078961899503,
'f1': 0.9285254346426272,
'p': 0.9327296248382924,
'r': 0.9243589743589744},
{'a': 0.933705512909979,
'f1': 0.8966267682263331,
'p': 0.865546218487395,
'r': 0.9300225733634312},
{'a': 0.9861636951882701,
'f1': 0.9925436806766332,
'p': 0.9959797498511018,
'r': 0.9891312384473198},
{'a': 0.9393139841688655,
'f1': 0.936986301369863,
'p': 0.9173819742489271,
'r': 0.9574468085106383},
{'a': 0.8996683250414593,
'f1': 0.9202373104812129,
'p': 0.8914431673052363,
'r': 0.9509536784741145},
{'a': 0.9659790083242852,
'f1': 0.9745533297238765,
'p': 0.9787928221859706,
'r': 0.9703504043126685},
{'a': 0.949358059914408,
'f1': 0.9095541401273886,
'p': 0.9037974683544304,
'r': 0.9153846153846154},
{'a': 0.9705240174672489,
'f1': 0.9608695652173913,
'p': 0.9822222222222222,
'r': 0.9404255319148936}],
'www.foxnews.com;2005': [{'a': 0.6034149484536082,
'f1': 0.6691749529696318,
'p': 0.5075417855686915,
'r': 0.9818611987381703},
{'a': 0.442833607907743,
'f1': 0.45749117741418033,
'p': 0.30135249366018596,
'r': 0.9494007989347537},
{'a': 0.458528951486698,
'f1': 0.4917743830787309,
'p': 0.33466613354658137,
'r': 0.9269102990033222},
{'a': 0.4705693148922483,
'f1': 0.5036188178528348,
'p': 0.3471933471933472,
'r': 0.9165751920965971},
{'a': 0.4147383410466358,
'f1': 0.3946980854197349,
'p': 0.24907063197026022,
'r': 0.950354609929078},
{'a': 0.3608128834355828,
'f1': 0.2672527472527472,
'p': 0.15565796210957503,
'r': 0.9440993788819876},
{'a': 0.551033386327504,
'f1': 0.6097291321171918,
'p': 0.44565656565656564,
'r': 0.9650043744531933},
{'a': 0.599594868332208,
'f1': 0.3762272089761571,
'p': 0.24183006535947713,
'r': 0.8468823993685872},
{'a': 0.7557954127315099,
'f1': 0.8411140371877743,
'p': 0.7352120535714286,
'r': 0.9826589595375722},
{'a': 0.5216294160057678,
'f1': 0.30194634402945814,
'p': 0.1810725552050473,
'r': 0.9082278481012658},
{'a': 0.5412064570943076,
'f1': 0.55,
'p': 0.38573933372296904,
'r': 0.9579100145137881},
{'a': 0.45275779376498804,
'f1': 0.3491158014831717,
'p': 0.21549295774647886,
'r': 0.918918918918919},
{'a': 0.46584641493423845,
'f1': 0.4211494252873563,
'p': 0.27213309566250743,
'r': 0.9308943089430894},
{'a': 0.3951965065502183,
'f1': 0.29228410832907514,
'p': 0.17470983506414173,
'r': 0.89375},
{'a': 0.47703180212014135,
'f1': 0.5151876116736153,
'p': 0.35262943334692215,
'r': 0.9558011049723757},
{'a': 0.456710653363373,
'f1': 0.48661800486618007,
'p': 0.3270645952575634,
'r': 0.9501187648456056},
{'a': 0.46838258659040355,
'f1': 0.5013412816691505,
'p': 0.33966074313408723,
'r': 0.9567690557451649},
{'a': 0.6817651632970451,
'f1': 0.7405294024409574,
'p': 0.5974424552429668,
'r': 0.9737390579408086},
{'a': 0.39481946624803765,
'f1': 0.3248686514886165,
'p': 0.19619249074563722,
'r': 0.9440203562340967},
{'a': 0.40449775112443775,
'f1': 0.41895845523698066,
'p': 0.26957831325301207,
'r': 0.9396325459317585},
{'a': 0.6685860524632118,
'f1': 0.7033218785796106,
'p': 0.5576748410535877,
'r': 0.951937984496124},
{'a': 0.46245186136071886,
'f1': 0.491962390051562,
'p': 0.33183306055646483,
'r': 0.9507620164126612},
{'a': 0.401333737496211,
'f1': 0.408859622867405,
'p': 0.2614854517611026,
'r': 0.9368998628257887},
{'a': 0.5186202686202687,
'f1': 0.5137218624730188,
'p': 0.35192226446979297,
'r': 0.9509132420091324},
{'a': 0.46610716591349255,
'f1': 0.4969586374695864,
'p': 0.3362139917695473,
'r': 0.9522144522144522}],
'www.foxnews.com;2010': [{'a': 0.7668161434977578,
'f1': 0.3764988009592326,
'p': 0.24881141045958796,
'r': 0.7733990147783252},
{'a': 0.7757424368581738,
'f1': 0.6937073540561032,
'p': 0.5988219895287958,
'r': 0.8243243243243243},
{'a': 0.2345960748516659,
'f1': 0.17103311913000496,
'p': 0.09495060373216246,
'r': 0.8606965174129353},
{'a': 0.7676311030741411,
'f1': 0.32189973614775724,
'p': 0.20854700854700856,
'r': 0.7052023121387283},
{'a': 0.7303617099013519,
'f1': 0.5472392638036809,
'p': 0.40879926672777267,
'r': 0.8274582560296846},
{'a': 0.7756373937677054,
'f1': 0.6793522267206478,
'p': 0.5883590462833099,
'r': 0.803639846743295},
{'a': 0.7619047619047619,
'f1': 0.5780474351902922,
'p': 0.45565217391304347,
'r': 0.7903469079939668},
{'a': 0.7622270070747462,
'f1': 0.6082108464267614,
'p': 0.5050505050505051,
'r': 0.7643312101910829},
{'a': 0.7117411850236278,
'f1': 0.507147296457427,
'p': 0.36298932384341637,
'r': 0.8412371134020619},
{'a': 0.7625284738041003,
'f1': 0.6472081218274112,
'p': 0.5379746835443038,
'r': 0.8121019108280255},
{'a': 0.7522093813732155,
'f1': 0.4774193548387096,
'p': 0.3501577287066246,
'r': 0.75},
{'a': 0.760662671836447,
'f1': 0.4270042194092827,
'p': 0.3092909535452323,
'r': 0.6893732970027248},
{'a': 0.749707145646232,
'f1': 0.4784377542717656,
'p': 0.35336538461538464,
'r': 0.7405541561712846},
{'a': 0.8027233477250083,
'f1': 0.7522935779816513,
'p': 0.6424501424501424,
'r': 0.9074446680080482},
{'a': 0.7579972183588317,
'f1': 0.6481294236602629,
'p': 0.5136217948717948,
'r': 0.8780821917808219},
{'a': 0.7645959831854273,
'f1': 0.3471502590673575,
'p': 0.22521008403361345,
'r': 0.7570621468926554},
{'a': 0.7599640395564878,
'f1': 0.607545320921117,
'p': 0.5,
'r': 0.7740324594257179},
{'a': 0.811549368393916,
'f1': 0.8121305576972501,
'p': 0.7301293900184843,
'r': 0.9148812970469021},
{'a': 0.7687253613666228,
'f1': 0.38028169014084506,
'p': 0.2583732057416268,
'r': 0.72},
{'a': 0.7672496025437202,
'f1': 0.7031630170316301,
'p': 0.5776149233844103,
'r': 0.8984455958549222},
{'a': 0.7780074410913601,
'f1': 0.5251989389920424,
'p': 0.3907894736842105,
'r': 0.8005390835579514},
{'a': 0.7532252729077076,
'f1': 0.5155844155844156,
'p': 0.3899803536345776,
'r': 0.7605363984674329},
{'a': 0.7881653607133208,
'f1': 0.7731481481481481,
'p': 0.6816326530612244,
'r': 0.893048128342246},
{'a': 0.7628689087165408,
'f1': 0.46805234795996925,
'p': 0.3370288248337029,
'r': 0.7657430730478589},
{'a': 0.7885294117647059,
'f1': 0.7472759226713532,
'p': 0.6545566502463054,
'r': 0.8705978705978706}],
'www.foxnews.com;2015': [{'a': 0.8563569682151589,
'f1': 0.8865282472235635,
'p': 0.8652214891611687,
'r': 0.9089108910891089},
{'a': 0.8018691588785046,
'f1': 0.7188328912466844,
'p': 0.6878172588832487,
'r': 0.7527777777777778},
{'a': 0.8519888991674376,
'f1': 0.6444444444444445,
'p': 0.6223175965665236,
'r': 0.6682027649769585},
{'a': 0.7843719090009891,
'f1': 0.6812865497076024,
'p': 0.589873417721519,
'r': 0.8062283737024222},
{'a': 0.8609422492401215,
'f1': 0.8042780748663102,
'p': 0.7752577319587629,
'r': 0.8355555555555556},
{'a': 0.8248520710059172,
'f1': 0.672566371681416,
'p': 0.5984251968503937,
'r': 0.7676767676767676},
{'a': 0.826963906581741,
'f1': 0.7433070866141732,
'p': 0.6685552407932012,
'r': 0.8368794326241135},
{'a': 0.8463476070528967,
'f1': 0.7328467153284672,
'p': 0.7011173184357542,
'r': 0.7675840978593272},
{'a': 0.8742857142857143,
'f1': 0.8811524609843938,
'p': 0.8706998813760379,
'r': 0.8918590522478737},
{'a': 0.8710010319917441,
'f1': 0.5954692556634303,
'p': 0.5227272727272727,
'r': 0.6917293233082706},
{'a': 0.8536170212765958,
'f1': 0.8093126385809313,
'p': 0.8039647577092511,
'r': 0.8147321428571429},
{'a': 0.8363201911589009,
'f1': 0.766609880749574,
'p': 0.7009345794392523,
'r': 0.8458646616541353},
{'a': 0.8519900497512438,
'f1': 0.8344923504867872,
'p': 0.8075370121130552,
'r': 0.8633093525179856},
{'a': 0.8399339933993399,
'f1': 0.8283185840707965,
'p': 0.7878787878787878,
'r': 0.8731343283582089},
{'a': 0.8649334178820546,
'f1': 0.8924785461887934,
'p': 0.8700787401574803,
'r': 0.9160621761658031},
{'a': 0.8697394789579158,
'f1': 0.8959167333867094,
'p': 0.8952,
'r': 0.8966346153846154},
{'a': 0.7922077922077922,
'f1': 0.6363636363636364,
'p': 0.5645161290322581,
'r': 0.7291666666666666},
{'a': 0.8699234844025897,
'f1': 0.9002257336343116,
'p': 0.8815207780725022,
'r': 0.9197416974169742},
{'a': 0.853763440860215,
'f1': 0.84012539184953,
'p': 0.8271604938271605,
'r': 0.8535031847133758},
{'a': 0.8404864091559371,
'f1': 0.8631062001227747,
'p': 0.8155452436194895,
'r': 0.9165580182529335},
{'a': 0.8048780487804879,
'f1': 0.8079999999999998,
'p': 0.7917133258678611,
'r': 0.8249708284714119},
{'a': 0.8483572030328559,
'f1': 0.8369565217391305,
'p': 0.8048780487804879,
'r': 0.8716981132075472},
{'a': 0.8672086720867209,
'f1': 0.8122605363984674,
'p': 0.8153846153846154,
'r': 0.8091603053435115},
{'a': 0.8849701573521432,
'f1': 0.8921668362156663,
'p': 0.8921668362156663,
'r': 0.8921668362156663},
{'a': 0.7781094527363184,
'f1': 0.6836879432624113,
'p': 0.5863746958637469,
'r': 0.8197278911564626},
{'a': 0.8762641284949435,
'f1': 0.8725490196078431,
'p': 0.8651275820170109,
'r': 0.8800988875154512}],
'www.latimes.com;2000': [{'a': 0.8859138533178114,
'f1': 0.9079812206572769,
'p': 0.8719567177637512,
'r': 0.9471106758080313},
{'a': 0.9060402684563759,
'f1': 0.9296754250386399,
'p': 0.9011235955056179,
'r': 0.960095770151636},
{'a': 0.8181818181818182,
'f1': 0.7275541795665633,
'p': 0.6167979002624672,
'r': 0.8867924528301887},
{'a': 0.8636176349402555,
'f1': 0.8996665656259473,
'p': 0.8436611711199545,
'r': 0.9636363636363636},
{'a': 0.9081803005008348,
'f1': 0.9397590361445783,
'p': 0.9081287044877223,
'r': 0.9736722650930549},
{'a': 0.8790149892933619,
'f1': 0.9011373578302712,
'p': 0.865546218487395,
'r': 0.9397810218978102},
{'a': 0.8671875,
'f1': 0.8919262555626193,
'p': 0.8565323565323565,
'r': 0.9303713527851459},
{'a': 0.8974439886399496,
'f1': 0.8914132976946207,
'p': 0.8567758509955041,
'r': 0.9289693593314763},
{'a': 0.906754772393539,
'f1': 0.9151069518716578,
'p': 0.8912760416666666,
'r': 0.9402472527472527},
{'a': 0.864039408866995,
'f1': 0.8217054263565892,
'p': 0.7718446601941747,
'r': 0.8784530386740331},
{'a': 0.95949263502455,
'f1': 0.9440993788819876,
'p': 0.9675925925925926,
'r': 0.9217199558985667},
{'a': 0.8910433979686058,
'f1': 0.9293695131683959,
'p': 0.8885921404044258,
'r': 0.9740694270179842},
{'a': 0.8336025848142165,
'f1': 0.851227732306211,
'p': 0.7864768683274022,
'r': 0.9275970619097587},
{'a': 0.8811685748124753,
'f1': 0.9156153630501823,
'p': 0.8850948509485095,
'r': 0.9483159117305459},
{'a': 0.879980563654033,
'f1': 0.9096892138939672,
'p': 0.8723702664796634,
'r': 0.9503437738731857},
{'a': 0.8948170731707317,
'f1': 0.9210827296988181,
'p': 0.884981684981685,
'r': 0.9602543720190779},
{'a': 0.8715647784632642,
'f1': 0.9174477289113193,
'p': 0.8724717175179979,
'r': 0.9673128088179399},
{'a': 0.8863366336633663,
'f1': 0.9081011847582452,
'p': 0.8807453416149068,
'r': 0.9372108393919365},
{'a': 0.8540250447227191,
'f1': 0.9017341040462428,
'p': 0.8482102401449932,
'r': 0.9624678663239075},
{'a': 0.8820047355958959,
'f1': 0.9161290322580645,
'p': 0.8850948509485095,
'r': 0.9494186046511628},
{'a': 0.875943000838223,
'f1': 0.9118522930315663,
'p': 0.870380898237635,
'r': 0.957473420888055},
{'a': 0.8548465660009742,
'f1': 0.8876319758672699,
'p': 0.8306280875088214,
'r': 0.9530364372469635},
{'a': 0.867056856187291,
'f1': 0.8451801363193768,
'p': 0.8097014925373134,
'r': 0.8839103869653768},
{'a': 0.8775137111517367,
'f1': 0.9103678929765886,
'p': 0.8635786802030457,
'r': 0.9625176803394625},
{'a': 0.9000886786875554,
'f1': 0.9353481254781943,
'p': 0.9015486725663717,
'r': 0.9717806041335453}],
'www.latimes.com;2005': [{'a': 0.7422196124486201,
'f1': 0.7298461538461539,
'p': 0.6161038961038962,
'r': 0.8950943396226415},
{'a': 0.6051838456901748,
'f1': 0.4910644910644911,
'p': 0.3484013230429989,
'r': 0.8315789473684211},
{'a': 0.7498252969951084,
'f1': 0.7250384024577572,
'p': 0.6059050064184852,
'r': 0.9024856596558317},
{'a': 0.7478488589599701,
'f1': 0.7092320966350302,
'p': 0.5796897038081805,
'r': 0.9133333333333333},
{'a': 0.779495990836197,
'f1': 0.7187728268809349,
'p': 0.5992691839220463,
'r': 0.8978102189781022},
{'a': 0.9032732622287606,
'f1': 0.927488282326992,
'p': 0.9241758241758242,
'r': 0.9308245711123408},
{'a': 0.8685015290519877,
'f1': 0.7754569190600522,
'p': 0.7156626506024096,
'r': 0.8461538461538461},
{'a': 0.7058096415327565,
'f1': 0.7202194357366772,
'p': 0.6038107752956636,
'r': 0.8922330097087379},
{'a': 0.7935819601040763,
'f1': 0.8212318477716575,
'p': 0.7263064658990257,
'r': 0.9447004608294931},
{'a': 0.7500845451471085,
'f1': 0.8024592354985296,
'p': 0.7117117117117117,
'r': 0.9197303921568627},
{'a': 0.7272727272727273,
'f1': 0.7387698686938493,
'p': 0.619351100811124,
'r': 0.9152397260273972},
{'a': 0.8605957446808511,
'f1': 0.9013134112543679,
'p': 0.8558352402745996,
'r': 0.9518961567828964},
{'a': 0.7504501260352899,
'f1': 0.7099204688154039,
'p': 0.5812200137080192,
'r': 0.9118279569892473},
{'a': 0.7527058051820269,
'f1': 0.7320540156361051,
'p': 0.6311274509803921,
'r': 0.871404399323181},
{'a': 0.7738570113531759,
'f1': 0.7661059980958426,
'p': 0.6675884955752213,
'r': 0.8987341772151899},
{'a': 0.7750533049040512,
'f1': 0.7287917737789202,
'p': 0.6169749727965179,
'r': 0.8901098901098901},
{'a': 0.8182175107970161,
'f1': 0.84127528282482,
'p': 0.7649625935162094,
'r': 0.9345011424219345},
{'a': 0.7622868605817452,
'f1': 0.7208480565371025,
'p': 0.6169354838709677,
'r': 0.8668555240793201},
{'a': 0.6897179253867152,
'f1': 0.6960784313725491,
'p': 0.5606604450825556,
'r': 0.917743830787309},
{'a': 0.6222222222222222,
'f1': 0.3751178133836004,
'p': 0.24968632371392724,
'r': 0.7537878787878788},
{'a': 0.6855524079320113,
'f1': 0.6961678832116787,
'p': 0.5593841642228738,
'r': 0.9214975845410628},
{'a': 0.8013661202185792,
'f1': 0.851844304055431,
'p': 0.7766629505759941,
'r': 0.9431407942238267},
{'a': 0.9086802194256212,
'f1': 0.9246739419749801,
'p': 0.9234449760765551,
'r': 0.92590618336887},
{'a': 0.732839313572543,
'f1': 0.7519014849692142,
'p': 0.6306196840826246,
'r': 0.9309417040358744},
{'a': 0.6179577464788732,
'f1': 0.354806739345887,
'p': 0.2315653298835705,
'r': 0.7584745762711864}],
'www.latimes.com;2010': [{'a': 0.5850843444806155,
'f1': 0.49166062364031904,
'p': 0.3570300157977883,
'r': 0.789289871944121},
{'a': 0.49572649572649574,
'f1': 0.41124886604172967,
'p': 0.27287319422150885,
'r': 0.8343558282208589},
{'a': 0.6794190577399929,
'f1': 0.3728343728343728,
'p': 0.2642436149312377,
'r': 0.6329411764705882},
{'a': 0.7071513002364066,
'f1': 0.47704485488126647,
'p': 0.3772954924874791,
'r': 0.648493543758967},
{'a': 0.8660617059891107,
'f1': 0.8762990278243379,
'p': 0.8378205128205128,
'r': 0.9184820801124385},
{'a': 0.8609794628751974,
'f1': 0.8613081166272655,
'p': 0.813849590469099,
'r': 0.9146443514644351},
{'a': 0.5482108713466266,
'f1': 0.4580602883355177,
'p': 0.3210840606338999,
'r': 0.7988571428571428},
{'a': 0.7263668192835981,
'f1': 0.49653121902874137,
'p': 0.3691967575534267,
'r': 0.7579425113464447},
{'a': 0.8486257928118394,
'f1': 0.8380090497737557,
'p': 0.7860780984719864,
'r': 0.8972868217054264},
{'a': 0.8374751491053678,
'f1': 0.7872478854912167,
'p': 0.7092614302461899,
'r': 0.8845029239766082},
{'a': 0.5851926977687627,
'f1': 0.1452455590386625,
'p': 0.08128654970760234,
'r': 0.6813725490196079},
{'a': 0.8460222412318221,
'f1': 0.8331788693234478,
'p': 0.779705117085863,
'r': 0.8945273631840795},
{'a': 0.7058642922935217,
'f1': 0.5801928133216476,
'p': 0.4652143359100492,
'r': 0.770663562281723},
{'a': 0.574838388861263,
'f1': 0.5037724898432966,
'p': 0.36717428087986465,
'r': 0.8022181146025879},
{'a': 0.63409915356711,
'f1': 0.41013645224171547,
'p': 0.2743870631194575,
'r': 0.8117283950617284},
{'a': 0.5813497619714366,
'f1': 0.27532719340765877,
'p': 0.1658878504672897,
'r': 0.8091168091168092},
{'a': 0.7512280701754386,
'f1': 0.5599006828057107,
'p': 0.41566820276497696,
'r': 0.8574144486692015},
{'a': 0.5737658674188999,
'f1': 0.5066927848514529,
'p': 0.3653483992467043,
'r': 0.8264110756123536},
{'a': 0.44818136522172397,
'f1': 0.33860853986264555,
'p': 0.21283783783783783,
'r': 0.8277372262773722},
{'a': 0.5393258426966292,
'f1': 0.43914415994387934,
'p': 0.30023980815347723,
'r': 0.8172323759791122},
{'a': 0.8528493364558938,
'f1': 0.8547206165703276,
'p': 0.811265544989027,
'r': 0.9030944625407166},
{'a': 0.5451306413301663,
'f1': 0.44046749452154854,
'p': 0.3015,
'r': 0.8170731707317073},
{'a': 0.6951649055395454,
'f1': 0.5310344827586208,
'p': 0.41945525291828795,
'r': 0.723489932885906},
{'a': 0.8481414324569356,
'f1': 0.8239621650026274,
'p': 0.7574879227053141,
'r': 0.9032258064516129},
{'a': 0.7052851597491788,
'f1': 0.2595648912228057,
'p': 0.1610800744878957,
'r': 0.667953667953668}],
'www.latimes.com;2015': [{'a': 0.5938778389053463,
'f1': 0.1715107913669065,
'p': 0.09600515463917526,
'r': 0.8032345013477089},
{'a': 0.63568345323741,
'f1': 0.18637532133676094,
'p': 0.10681399631675875,
'r': 0.7304785894206549},
{'a': 0.5944452121044632,
'f1': 0.19522895530573073,
'p': 0.11069651741293532,
'r': 0.8259860788863109},
{'a': 0.311409056412851,
'f1': 0.35004775549188155,
'p': 0.21673565937315198,
'r': 0.9094292803970223},
{'a': 0.23751617076326004,
'f1': 0.21643180005317736,
'p': 0.12378345498783455,
'r': 0.8604651162790697},
{'a': 0.5991861648016277,
'f1': 0.13758599124452783,
'p': 0.07498295841854125,
'r': 0.8333333333333334},
{'a': 0.3306508875739645,
'f1': 0.3897280966767372,
'p': 0.24814509480626545,
'r': 0.907537688442211},
{'a': 0.603215251102575,
'f1': 0.1875910282551704,
'p': 0.10595590654820665,
'r': 0.817258883248731},
{'a': 0.5912418842381545,
'f1': 0.19832023841777296,
'p': 0.11244239631336406,
'r': 0.8394495412844036},
{'a': 0.6033519553072626,
'f1': 0.292358803986711,
'p': 0.1753487048107031,
'r': 0.8787446504992867},
{'a': 0.580749718151071,
'f1': 0.12164157071154416,
'p': 0.06592,
'r': 0.7862595419847328},
{'a': 0.5854936959909336,
'f1': 0.13534278959810875,
'p': 0.07360977177756349,
'r': 0.8388278388278388},
{'a': 0.23728315201411349,
'f1': 0.19089207735495947,
'p': 0.10778443113772455,
'r': 0.8337874659400545},
{'a': 0.5976490582070528,
'f1': 0.17484751670055182,
'p': 0.09798177083333333,
'r': 0.8113207547169812},
{'a': 0.601409666283084,
'f1': 0.15851806863042817,
'p': 0.08805668016194332,
'r': 0.7933130699088146},
{'a': 0.5995661605206074,
'f1': 0.1482620732082436,
'p': 0.08166723144696712,
'r': 0.8033333333333333},
{'a': 0.6116892373485389,
'f1': 0.18491921005385994,
'p': 0.10492359932088285,
'r': 0.7783375314861462},
{'a': 0.59974993053626,
'f1': 0.20786362386582347,
'p': 0.11954459203036052,
'r': 0.7957894736842105},
{'a': 0.5972012621758814,
'f1': 0.22818086225026288,
'p': 0.13183475091130012,
'r': 0.84765625},
{'a': 0.2567389875082183,
'f1': 0.19565990750622553,
'p': 0.11079774375503626,
'r': 0.8358662613981763},
{'a': 0.6091758708581139,
'f1': 0.22384701912260968,
'p': 0.1285529715762274,
'r': 0.8652173913043478},
{'a': 0.608569161597461,
'f1': 0.31417979610750696,
'p': 0.19093213179386087,
'r': 0.8862745098039215},
{'a': 0.27805978567399886,
'f1': 0.26857142857142857,
'p': 0.15921409214092141,
'r': 0.8576642335766423},
{'a': 0.2214304565848509,
'f1': 0.1873278236914601,
'p': 0.10559006211180125,
'r': 0.8292682926829268},
{'a': 0.23664980326025858,
'f1': 0.20631209818819407,
'p': 0.11719787516600266,
'r': 0.8609756097560975}],
'www.nymag.com;2000': [{'a': 0.9425414364640884,
'f1': 0.9440860215053763,
'p': 0.9251844046364595,
'r': 0.9637760702524698},
{'a': 0.9427288040426727,
'f1': 0.9430803571428572,
'p': 0.9224890829694323,
'r': 0.9646118721461188},
{'a': 0.9402366863905326,
'f1': 0.9463051568314725,
'p': 0.9368421052631579,
'r': 0.9559613319011815},
{'a': 0.9270248596631917,
'f1': 0.9103448275862068,
'p': 0.8733459357277883,
'r': 0.9506172839506173},
{'a': 0.9404255319148936,
'f1': 0.851851851851852,
'p': 0.8341968911917098,
'r': 0.8702702702702703},
{'a': 0.9549382716049383,
'f1': 0.9581181870338497,
'p': 0.9619815668202765,
'r': 0.9542857142857143},
{'a': 0.9580137262817925,
'f1': 0.9697146185206756,
'p': 0.9714119019836639,
'r': 0.9680232558139535},
{'a': 0.9394673123486683,
'f1': 0.9440089585666294,
'p': 0.9366666666666666,
'r': 0.9514672686230248},
{'a': 0.9344746162927982,
'f1': 0.9410515135422199,
'p': 0.9267782426778243,
'r': 0.9557713052858684},
{'a': 0.5105755041810133,
'f1': 0.4271732872769142,
'p': 0.28277439024390244,
'r': 0.8729411764705882},
{'a': 0.9386454183266932,
'f1': 0.9216683621566633,
'p': 0.9114688128772636,
'r': 0.9320987654320988},
{'a': 0.627173213135866,
'f1': 0.5021496130696474,
'p': 0.34803337306317045,
'r': 0.9012345679012346},
{'a': 0.9426644182124789,
'f1': 0.9506292352371732,
'p': 0.9370229007633588,
'r': 0.9646365422396856},
{'a': 0.9452054794520548,
'f1': 0.9559902200488998,
'p': 0.9630541871921182,
'r': 0.9490291262135923},
{'a': 0.9378813089295619,
'f1': 0.946360153256705,
'p': 0.9285714285714286,
'r': 0.96484375},
{'a': 0.9535490605427975,
'f1': 0.9616213885295387,
'p': 0.948936170212766,
'r': 0.9746503496503497},
{'a': 0.6271008403361344,
'f1': 0.5862470862470862,
'p': 0.4280851063829787,
'r': 0.9297597042513863},
{'a': 0.9449612403100776,
'f1': 0.9086229086229086,
'p': 0.9145077720207254,
'r': 0.9028132992327366},
{'a': 0.9414389291689905,
'f1': 0.9415041782729805,
'p': 0.9378468368479467,
'r': 0.9451901565995525},
{'a': 0.9498181818181818,
'f1': 0.9332042594385286,
'p': 0.9323017408123792,
'r': 0.9341085271317829},
{'a': 0.9301221166892809,
'f1': 0.9286209286209286,
'p': 0.8993288590604027,
'r': 0.9598853868194842},
{'a': 0.9354838709677419,
'f1': 0.932415519399249,
'p': 0.9085365853658537,
'r': 0.9575835475578406},
{'a': 0.9311145510835913,
'f1': 0.9187214611872146,
'p': 0.8902654867256637,
'r': 0.9490566037735849},
{'a': 0.9412225705329154,
'f1': 0.9006622516556291,
'p': 0.9139784946236559,
'r': 0.8877284595300261},
{'a': 0.9436519258202568,
'f1': 0.9399239543726237,
'p': 0.9507692307692308,
'r': 0.9293233082706767}],
'www.nymag.com;2005': [{'a': 0.7711069418386491,
'f1': 0.7621832358674464,
'p': 0.6729776247848537,
'r': 0.8786516853932584},
{'a': 0.7747963584091998,
'f1': 0.7693817468105986,
'p': 0.6782006920415224,
'r': 0.8888888888888888},
{'a': 0.7776712985146143,
'f1': 0.7600827300930713,
'p': 0.6693989071038251,
'r': 0.8791866028708134},
{'a': 0.7718120805369127,
'f1': 0.7603036876355748,
'p': 0.6594543744120414,
'r': 0.8975672215108835},
{'a': 0.772093023255814,
'f1': 0.7591480065537956,
'p': 0.6575212866603595,
'r': 0.8979328165374677},
{'a': 0.6690590111642744,
'f1': 0.5300113250283126,
'p': 0.3848684210526316,
'r': 0.850909090909091},
{'a': 0.6889952153110048,
'f1': 0.6470131885182312,
'p': 0.4970202622169249,
'r': 0.9266666666666666},
{'a': 0.7529296875,
'f1': 0.7548449612403101,
'p': 0.6629787234042553,
'r': 0.876265466816648},
{'a': 0.7890173410404624,
'f1': 0.7859237536656892,
'p': 0.7030430220356768,
'r': 0.8909574468085106},
{'a': 0.8201140487299119,
'f1': 0.8394261915779732,
'p': 0.7699490662139219,
'r': 0.9226856561546287},
{'a': 0.7540029112081513,
'f1': 0.7907552620718118,
'p': 0.6794326241134752,
'r': 0.945705824284304},
{'a': 0.7821091505949939,
'f1': 0.8270921523933572,
'p': 0.7345286292654714,
'r': 0.9463487332339792},
{'a': 0.7581395348837209,
'f1': 0.6584564860426929,
'p': 0.5976154992548435,
'r': 0.7330895795246801},
{'a': 0.6695778748180495,
'f1': 0.6828132277596646,
'p': 0.5461997019374069,
'r': 0.9105590062111801},
{'a': 0.697495183044316,
'f1': 0.7186379928315413,
'p': 0.5905743740795287,
'r': 0.9176201372997712},
{'a': 0.7282120395327942,
'f1': 0.7632093933463796,
'p': 0.6482712765957447,
'r': 0.9276879162702188},
{'a': 0.8176943699731903,
'f1': 0.8482142857142857,
'p': 0.7840440165061898,
'r': 0.9238249594813615},
{'a': 0.7697462900909526,
'f1': 0.7593796898449224,
'p': 0.6704946996466431,
'r': 0.8754325259515571},
{'a': 0.7224770642201835,
'f1': 0.7547628698824482,
'p': 0.6363636363636364,
'r': 0.9272908366533864},
{'a': 0.7748896517900932,
'f1': 0.7610619469026549,
'p': 0.6688014638609332,
'r': 0.8828502415458938},
{'a': 0.7994902293967715,
'f1': 0.814026792750197,
'p': 0.7405017921146954,
'r': 0.9037620297462817},
{'a': 0.7799607072691552,
'f1': 0.7812499999999999,
'p': 0.6884681583476764,
'r': 0.9029345372460497},
{'a': 0.6740623349181194,
'f1': 0.6716338477913784,
'p': 0.531592249368155,
'r': 0.911849710982659},
{'a': 0.7434108527131783,
'f1': 0.602641056422569,
'p': 0.47992351816443596,
'r': 0.8096774193548387},
{'a': 0.6736401673640168,
'f1': 0.6729559748427673,
'p': 0.5358931552587646,
'r': 0.9042253521126761},
{'a': 0.768056968463886,
'f1': 0.7894736842105263,
'p': 0.6979591836734694,
'r': 0.9086078639744952}],
'www.nymag.com;2010': [{'a': 0.48481943112815595,
'f1': 0.0627906976744186,
'p': 0.037241379310344824,
'r': 0.2},
{'a': 0.44631901840490795,
'f1': 0.33419402434526,
'p': 0.20798898071625344,
'r': 0.849906191369606},
{'a': 0.3217094017094017,
'f1': 0.036893203883495145,
'p': 0.01954732510288066,
'r': 0.3275862068965517},
{'a': 0.311042524005487,
'f1': 0.02899951667472209,
'p': 0.015511892450879007,
'r': 0.2222222222222222},
{'a': 0.4183240952070427,
'f1': 0.22904062229904926,
'p': 0.1347914547304171,
'r': 0.7614942528735632},
{'a': 0.3187355943365163,
'f1': 0.1510053344275749,
'p': 0.08329560887279312,
'r': 0.8070175438596491},
{'a': 0.41282778171509565,
'f1': 0.043854587420657815,
'p': 0.023944549464398234,
'r': 0.2602739726027397},
{'a': 0.43229657555765,
'f1': 0.2786427145708583,
'p': 0.16941747572815535,
'r': 0.7842696629213484},
{'a': 0.3127237227465018,
'f1': 0.14146341463414633,
'p': 0.0777479892761394,
'r': 0.7837837837837838},
{'a': 0.30776762402088775,
'f1': 0.1360488798370672,
'p': 0.07435440783615316,
'r': 0.7990430622009569},
{'a': 0.5344157329064715,
'f1': 0.5479573712255773,
'p': 0.3932441045251753,
'r': 0.9033674963396779},
{'a': 0.3160771704180064,
'f1': 0.15829046299960425,
'p': 0.08798944126704795,
'r': 0.7874015748031497},
{'a': 0.3274732850741124,
'f1': 0.03938946331856228,
'p': 0.021197668256491786,
'r': 0.2777777777777778},
{'a': 0.309208290859667,
'f1': 0.031443544545021435,
'p': 0.016516516516516516,
'r': 0.32673267326732675},
{'a': 0.3213815789473684,
'f1': 0.16104107360715736,
'p': 0.0894713059195662,
'r': 0.8048780487804879},
{'a': 0.32691658223573117,
'f1': 0.15658061785865424,
'p': 0.08714083843617522,
'r': 0.7708333333333334},
{'a': 0.4134419551934827,
'f1': 0.187206020696143,
'p': 0.1067024128686327,
'r': 0.7624521072796935},
{'a': 0.4195666447800394,
'f1': 0.23130434782608694,
'p': 0.13516260162601626,
'r': 0.8012048192771084},
{'a': 0.5790219702338767,
'f1': 0.5123152709359605,
'p': 0.3659824046920821,
'r': 0.853625170998632},
{'a': 0.48916909149692855,
'f1': 0.05952380952380952,
'p': 0.03333333333333333,
'r': 0.2777777777777778},
{'a': 0.3778471138845554,
'f1': 0.3018207282913165,
'p': 0.18340425531914895,
'r': 0.8517786561264822},
{'a': 0.3234536082474227,
'f1': 0.16930379746835442,
'p': 0.09460654288240496,
'r': 0.8045112781954887},
{'a': 0.4106593782029382,
'f1': 0.18207681365576103,
'p': 0.10339256865912763,
'r': 0.7619047619047619},
{'a': 0.40123034859876966,
'f1': 0.04782608695652174,
'p': 0.026112759643916916,
'r': 0.2838709677419355},
{'a': 0.5376782077393075,
'f1': 0.5942806076854334,
'p': 0.44127405441274054,
'r': 0.9097127222982216}],
'www.nymag.com;2015': [{'a': 0.12131556489201077,
'f1': 0.0027319011548491245,
'p': 0.0013710582076529975,
'r': 0.36666666666666664},
{'a': 0.12269175361243288,
'f1': 0.003965008797363269,
'p': 0.001994266483858906,
'r': 0.33613445378151263},
{'a': 0.12294602844710008,
'f1': 0.02455937590291823,
'p': 0.01246913278403951,
'r': 0.8082408874801902},
{'a': 0.16712910070181167,
'f1': 0.05854498493327594,
'p': 0.030300136859861868,
'r': 0.8631006346328196},
{'a': 0.25470154326426825,
'f1': 0.25441998690374257,
'p': 0.14750459948018574,
'r': 0.9245835621453414},
{'a': 0.17664334917498797,
'f1': 0.0838928168260947,
'p': 0.04405891163255117,
'r': 0.8748451053283767},
{'a': 0.12178293724674187,
'f1': 0.0027855845996965704,
'p': 0.0013984966161376521,
'r': 0.34146341463414637},
{'a': 0.12372770769899956,
'f1': 0.005528816487720596,
'p': 0.0027873870734924466,
'r': 0.33532934131736525},
{'a': 0.12206695969734742,
'f1': 0.003672912271994044,
'p': 0.0018477826608070316,
'r': 0.29959514170040485},
{'a': 0.12218016322779686,
'f1': 0.0030812812166090995,
'p': 0.0015481035731229244,
'r': 0.31958762886597936},
{'a': 0.12040199256052729,
'f1': 0.01787622656174099,
'p': 0.009036884239477433,
'r': 0.8177777777777778},
{'a': 0.15947986577181208,
'f1': 0.004504355314145663,
'p': 0.002267724938304542,
'r': 0.3285024154589372},
{'a': 0.12283227537464826,
'f1': 0.004061818902318208,
'p': 0.002044123145955378,
'r': 0.31417624521072796},
{'a': 0.131848751352114,
'f1': 0.02022239324858682,
'p': 0.010238357563217155,
'r': 0.8141025641025641},
{'a': 0.294921875,
'f1': 0.34880605811648296,
'p': 0.21353597200962168,
'r': 0.9516616314199395},
{'a': 0.12296532587559265,
'f1': 0.004612294492523619,
'p': 0.0023225033089428865,
'r': 0.3274647887323944},
{'a': 0.12218213621952553,
'f1': 0.003573999156139088,
'p': 0.001795914294978923,
'r': 0.36},
{'a': 0.627518315018315,
'f1': 0.7415409054805402,
'p': 0.6018566271273853,
'r': 0.965659908978072},
{'a': 0.12394710426395496,
'f1': 0.005351568306823249,
'p': 0.002696965913347484,
'r': 0.34069400630914826},
{'a': 0.12461045612046985,
'f1': 0.0060868489422244215,
'p': 0.003071928071928072,
'r': 0.328},
{'a': 0.16345886410413307,
'f1': 0.009158293526601878,
'p': 0.004635187408296652,
'r': 0.3787465940054496},
{'a': 0.1570371188687583,
'f1': 0.030863021527910078,
'p': 0.01572306696861857,
'r': 0.8321917808219178},
{'a': 0.12216138391396349,
'f1': 0.003220133263976617,
'p': 0.001618002140741294,
'r': 0.3282828282828283},
{'a': 0.12024096385542168,
'f1': 0.0014917580368464236,
'p': 0.0007477380922708806,
'r': 0.3},
{'a': 0.12138728323699421,
'f1': 0.0031796502384737677,
'p': 0.001597444089456869,
'r': 0.3333333333333333},
{'a': 0.1540937213883417,
'f1': 0.02350831118086026,
'p': 0.011924876527164023,
'r': 0.8210290827740492}]},
{'entertainment.msn.com;2000': [{'a': 0.9472774416594641,
'f1': 0.8551068883610451,
'p': 0.8695652173913043,
'r': 0.8411214953271028},
{'a': 0.8793650793650793,
'f1': 0.7610062893081762,
'p': 0.7034883720930233,
'r': 0.8287671232876712},
{'a': 0.8937994722955145,
'f1': 0.866390041493776,
'p': 0.8143525741029641,
'r': 0.925531914893617},
{'a': 0.8584288052373159,
'f1': 0.7232,
'p': 0.6108108108108108,
'r': 0.8862745098039215},
{'a': 0.9196428571428571,
'f1': 0.8461538461538463,
'p': 0.8181818181818182,
'r': 0.8761061946902655},
{'a': 0.9419152276295133,
'f1': 0.87987012987013,
'p': 0.9093959731543624,
'r': 0.8522012578616353},
{'a': 0.9002808988764045,
'f1': 0.8820598006644519,
'p': 0.8415213946117274,
'r': 0.9267015706806283},
{'a': 0.8494897959183674,
'f1': 0.6844919786096256,
'p': 0.5630498533724341,
'r': 0.8727272727272727},
{'a': 0.8653998416468726,
'f1': 0.7803617571059432,
'p': 0.6926605504587156,
'r': 0.893491124260355},
{'a': 0.8411867364746946,
'f1': 0.6239669421487603,
'p': 0.49834983498349833,
'r': 0.8342541436464088},
{'a': 0.8753501400560224,
'f1': 0.8536184210526315,
'p': 0.783987915407855,
'r': 0.9368231046931408},
{'a': 0.9394205443371378,
'f1': 0.8899521531100478,
'p': 0.8637770897832817,
'r': 0.9177631578947368},
{'a': 0.9521082769390942,
'f1': 0.9530612244897959,
'p': 0.9648760330578512,
'r': 0.9415322580645161},
{'a': 0.9530469530469531,
'f1': 0.6356589147286822,
'p': 0.6029411764705882,
'r': 0.6721311475409836},
{'a': 0.9365397461589846,
'f1': 0.8938547486033519,
'p': 0.8888888888888888,
'r': 0.898876404494382},
{'a': 0.9328579916815211,
'f1': 0.8986547085201794,
'p': 0.9488636363636364,
'r': 0.8534923339011925},
{'a': 0.9323636363636364,
'f1': 0.8695652173913044,
'p': 0.8587257617728532,
'r': 0.8806818181818182},
{'a': 0.8506151142355008,
'f1': 0.698581560283688,
'p': 0.5915915915915916,
'r': 0.8528138528138528},
{'a': 0.8543778801843318,
'f1': 0.6504424778761062,
'p': 0.5287769784172662,
'r': 0.8448275862068966},
{'a': 0.8528072837632777,
'f1': 0.750642673521851,
'p': 0.6561797752808989,
'r': 0.8768768768768769},
{'a': 0.8962336664104535,
'f1': 0.8609680741503605,
'p': 0.8116504854368932,
'r': 0.9166666666666666},
{'a': 0.902315484804631,
'f1': 0.8802129547471162,
'p': 0.830820770519263,
'r': 0.9358490566037736},
{'a': 0.9497374343585896,
'f1': 0.9130998702983139,
'p': 0.9263157894736842,
'r': 0.9002557544757033},
{'a': 0.9299820466786356,
'f1': 0.904019688269073,
'p': 0.9168053244592346,
'r': 0.8915857605177994},
{'a': 0.9568904593639576,
'f1': 0.9653802497162315,
'p': 0.9714448886350657,
'r': 0.9593908629441624}],
'entertainment.msn.com;2005': [{'a': 0.6705118961788031,
'f1': 0.6788475052705552,
'p': 0.5337016574585636,
'r': 0.9324324324324325},
{'a': 0.5303454715219421,
'f1': 0.20285261489698891,
'p': 0.11895910780669144,
'r': 0.6881720430107527},
{'a': 0.40272108843537413,
'f1': 0.23251748251748253,
'p': 0.13434343434343435,
'r': 0.8636363636363636},
{'a': 0.6301639344262295,
'f1': 0.5559055118110235,
'p': 0.39887005649717516,
'r': 0.9168831168831169},
{'a': 0.6722689075630253,
'f1': 0.6871657754010695,
'p': 0.5444915254237288,
'r': 0.9311594202898551},
{'a': 0.616135328562134,
'f1': 0.5674486803519062,
'p': 0.419284940411701,
'r': 0.8775510204081632},
{'a': 0.6080983369486623,
'f1': 0.5758998435054773,
'p': 0.423963133640553,
'r': 0.8975609756097561},
{'a': 0.6859838274932615,
'f1': 0.7119901112484549,
'p': 0.5708622398414271,
'r': 0.9458128078817734},
{'a': 0.660300136425648,
'f1': 0.587748344370861,
'p': 0.43773119605425403,
'r': 0.8942065491183879},
{'a': 0.7899616368286445,
'f1': 0.8586792858679285,
'p': 0.7632887189292543,
'r': 0.9813176007866273},
{'a': 0.7529249827942188,
'f1': 0.8308199811498587,
'p': 0.7252159605100782,
'r': 0.9724214009928296},
{'a': 0.6455199476782211,
'f1': 0.6377005347593583,
'p': 0.48377281947261663,
'r': 0.9352941176470588},
{'a': 0.6298076923076923,
'f1': 0.6264726264726265,
'p': 0.4767932489451477,
'r': 0.9131313131313131},
{'a': 0.7221974095578383,
'f1': 0.7770609318996415,
'p': 0.6541943270971635,
'r': 0.9567519858781994},
{'a': 0.5383469843633656,
'f1': 0.411764705882353,
'p': 0.26889714993804215,
'r': 0.8785425101214575},
{'a': 0.5951717734447539,
'f1': 0.35311572700296734,
'p': 0.22580645161290322,
'r': 0.8095238095238095},
{'a': 0.6649253731343283,
'f1': 0.6671608598962195,
'p': 0.5172413793103449,
'r': 0.9394572025052192},
{'a': 0.4439288476411446,
'f1': 0.24712041884816754,
'p': 0.145679012345679,
'r': 0.8137931034482758},
{'a': 0.6280388978930308,
'f1': 0.5800548947849954,
'p': 0.4232309746328438,
'r': 0.9215116279069767},
{'a': 0.6438356164383562,
'f1': 0.629746835443038,
'p': 0.47664670658682634,
'r': 0.9277389277389277},
{'a': 0.552689756816507,
'f1': 0.33223322332233224,
'p': 0.20798898071625344,
'r': 0.825136612021858},
{'a': 0.5778097982708934,
'f1': 0.5274193548387097,
'p': 0.3737142857142857,
'r': 0.8958904109589041},
{'a': 0.6257272139625081,
'f1': 0.5415676959619952,
'p': 0.38687782805429866,
'r': 0.9023746701846965},
{'a': 0.6899942163100058,
'f1': 0.7032115171650054,
'p': 0.5614500442086648,
'r': 0.9407407407407408},
{'a': 0.6089181286549707,
'f1': 0.5868725868725869,
'p': 0.4323094425483504,
'r': 0.9134615384615384},
{'a': 0.40759678597516436,
'f1': 0.20568070519098924,
'p': 0.11797752808988764,
'r': 0.8015267175572519},
{'a': 0.6303763440860215,
'f1': 0.638157894736842,
'p': 0.4801980198019802,
'r': 0.9509803921568627}],
'entertainment.msn.com;2010': [{'a': 0.32477492497499166,
'f1': 0.16769420468557336,
'p': 0.09264305177111716,
'r': 0.8831168831168831},
{'a': 0.4359816152330926,
'f1': 0.4124487004103968,
'p': 0.2681191640729213,
'r': 0.8933333333333333},
{'a': 0.5924591072913779,
'f1': 0.5242718446601942,
'p': 0.3720716582452917,
'r': 0.8871851040525739},
{'a': 0.507501630789302,
'f1': 0.5379436964504284,
'p': 0.3753202391118702,
'r': 0.949244060475162},
{'a': 0.3220740103270224,
'f1': 0.31722643553629465,
'p': 0.1913225300575013,
'r': 0.9277566539923955},
{'a': 0.49795584627964024,
'f1': 0.29988597491448116,
'p': 0.18063186813186813,
'r': 0.8825503355704698},
{'a': 0.138558232549281,
'f1': 0.10859282617264485,
'p': 0.05777498165041418,
'r': 0.9018003273322422},
{'a': 0.729470802919708,
'f1': 0.6268093140339837,
'p': 0.5107692307692308,
'r': 0.8110749185667753},
{'a': 0.4536752136752137,
'f1': 0.4516129032258065,
'p': 0.2974683544303797,
'r': 0.9373219373219374},
{'a': 0.47837058399423216,
'f1': 0.4759145237232887,
'p': 0.31846825012118274,
'r': 0.9412607449856734},
{'a': 0.7559787422497786,
'f1': 0.686039886039886,
'p': 0.5755258126195029,
'r': 0.849083215796897},
{'a': 0.3456439393939394,
'f1': 0.24425811155668975,
'p': 0.14152936206168146,
'r': 0.8909574468085106},
{'a': 0.17442764578833694,
'f1': 0.18879456706281836,
'p': 0.10504439826185528,
'r': 0.931323283082077},
{'a': 0.3945793734600493,
'f1': 0.320695102685624,
'p': 0.19623006283228614,
'r': 0.8768898488120951},
{'a': 0.7394551845342706,
'f1': 0.6513815402704292,
'p': 0.5321805955811719,
'r': 0.8393939393939394},
{'a': 0.42480049109883367,
'f1': 0.41909485430874144,
'p': 0.2694300518134715,
'r': 0.9428172942817294},
{'a': 0.7938435470206157,
'f1': 0.8259418216499761,
'p': 0.7462300732442912,
'r': 0.9247197010144154},
{'a': 0.1341623994147769,
'f1': 0.13504823151125403,
'p': 0.07277309600693077,
'r': 0.9361702127659575},
{'a': 0.6128633271490415,
'f1': 0.4440497335701598,
'p': 0.2972651605231867,
'r': 0.8771929824561403},
{'a': 0.7444128403088176,
'f1': 0.6921194322075379,
'p': 0.5852649006622517,
'r': 0.8467065868263473},
{'a': 0.6936281640965959,
'f1': 0.7520602778431834,
'p': 0.6199534161490683,
'r': 0.9557151406343507},
{'a': 0.45454545454545453,
'f1': 0.45895522388059706,
'p': 0.30609705516383245,
'r': 0.9167701863354037},
{'a': 0.765868491068035,
'f1': 0.7458745874587458,
'p': 0.6536514822848879,
'r': 0.8683957732949087},
{'a': 0.36902160718621024,
'f1': 0.3183844741673223,
'p': 0.19312758510976774,
'r': 0.9059701492537313},
{'a': 0.7229783827061649,
'f1': 0.6614481409001957,
'p': 0.5509372453137734,
'r': 0.8274173806609547}],
'entertainment.msn.com;2015': [{'a': 0.5653213465952563,
'f1': 0.2877291960507757,
'p': 0.17216804201050262,
'r': 0.8751191611058151},
{'a': 0.5324997416554718,
'f1': 0.08086143843965866,
'p': 0.042421658494990404,
'r': 0.8614718614718615},
{'a': 0.5521454112038141,
'f1': 0.2023704227843623,
'p': 0.114652234916817,
'r': 0.8614457831325302},
{'a': 0.5348189415041783,
'f1': 0.08035896389965327,
'p': 0.042274678111587985,
'r': 0.8106995884773662},
{'a': 0.5325474115838031,
'f1': 0.08507223113964689,
'p': 0.04488672453948762,
'r': 0.8122605363984674},
{'a': 0.5308124680633622,
'f1': 0.08564031069508066,
'p': 0.045158580130224744,
'r': 0.8269230769230769},
{'a': 0.4705931917762049,
'f1': 0.17361567802183347,
'p': 0.09579100145137881,
'r': 0.9256661991584852},
{'a': 0.5321897282215563,
'f1': 0.06640544442153022,
'p': 0.03457903780068729,
'r': 0.8341968911917098},
{'a': 0.5278260869565218,
'f1': 0.109024612579763,
'p': 0.05815989107177592,
'r': 0.8691860465116279},
{'a': 0.5267447784004076,
'f1': 0.07377866400797606,
'p': 0.038622129436325675,
'r': 0.8222222222222222},
{'a': 0.44264614840364375,
'f1': 0.08026853473438413,
'p': 0.04208754208754209,
'r': 0.8647798742138365},
{'a': 0.5371387871387872,
'f1': 0.11549679175578458,
'p': 0.062017122572562124,
'r': 0.8389830508474576},
{'a': 0.5385151968111609,
'f1': 0.1590702741964772,
'p': 0.08728577122359506,
'r': 0.8957055214723927},
{'a': 0.5157987126974839,
'f1': 0.10909743405706084,
'p': 0.05837173579109063,
'r': 0.8328767123287671},
{'a': 0.4932993582483956,
'f1': 0.04821840099273179,
'p': 0.024840182648401828,
'r': 0.8192771084337349},
{'a': 0.5023167075497411,
'f1': 0.18263205013428827,
'p': 0.1021021021021021,
'r': 0.864406779661017},
{'a': 0.5346044453808069,
'f1': 0.047023295944779976,
'p': 0.02422222222222222,
'r': 0.8014705882352942},
{'a': 0.4744559633884375,
'f1': 0.130696740305886,
'p': 0.07026578073089701,
'r': 0.9337748344370861},
{'a': 0.4932264834955161,
'f1': 0.08916323731138545,
'p': 0.04696531791907514,
'r': 0.8783783783783784},
{'a': 0.48101155057752887,
'f1': 0.16287932251235002,
'p': 0.08991740688795387,
'r': 0.8637724550898204},
{'a': 0.5566702819956616,
'f1': 0.32372811250517025,
'p': 0.19864636209813874,
'r': 0.874162323157111},
{'a': 0.5400783414540938,
'f1': 0.1694271911663216,
'p': 0.09375596715676915,
'r': 0.8783542039355993},
{'a': 0.5258629519614872,
'f1': 0.05933753302174354,
'p': 0.030782205355260384,
'r': 0.8202247191011236},
{'a': 0.5253086419753087,
'f1': 0.0560556464811784,
'p': 0.0290438838244647,
'r': 0.8011695906432749},
{'a': 0.5436151937392633,
'f1': 0.18423746161719548,
'p': 0.10283755475147591,
'r': 0.88379705400982}],
'news.bbc.co.uk;2000': [{'a': 0.7107309486780715,
'f1': 0.6395348837209303,
'p': 0.4903417533432392,
'r': 0.9192200557103064},
{'a': 0.6795146324054246,
'f1': 0.6191687871077184,
'p': 0.46378653113087676,
'r': 0.9311224489795918},
{'a': 0.7925501432664757,
'f1': 0.8124352331606217,
'p': 0.7088607594936709,
'r': 0.9514563106796117},
{'a': 0.7884745762711864,
'f1': 0.7877551020408163,
'p': 0.67561260210035,
'r': 0.9445350734094616},
{'a': 0.7935444579780755,
'f1': 0.8011730205278593,
'p': 0.7019527235354573,
'r': 0.9330601092896175},
{'a': 0.6966434452184928,
'f1': 0.6774410774410775,
'p': 0.5289169295478444,
'r': 0.9419475655430711},
{'a': 0.7463263727764888,
'f1': 0.6796875,
'p': 0.5559105431309904,
'r': 0.8743718592964824},
{'a': 0.7112462006079028,
'f1': 0.6666666666666667,
'p': 0.521978021978022,
'r': 0.9223300970873787},
{'a': 0.6960352422907489,
'f1': 0.6072106261859583,
'p': 0.4584527220630373,
'r': 0.898876404494382},
{'a': 0.787661406025825,
'f1': 0.7771084337349398,
'p': 0.6632390745501285,
'r': 0.9381818181818182},
{'a': 0.7012601927353596,
'f1': 0.6398570151921358,
'p': 0.4890710382513661,
'r': 0.9250645994832042},
{'a': 0.7168998923573735,
'f1': 0.7251828631138975,
'p': 0.5998271391529818,
'r': 0.916776750330251},
{'a': 0.6901521216973578,
'f1': 0.5913410770855333,
'p': 0.4381846635367762,
'r': 0.9090909090909091},
{'a': 0.712130356065178,
'f1': 0.7071823204419889,
'p': 0.5686080947680158,
'r': 0.935064935064935},
{'a': 0.7368421052631579,
'f1': 0.6928327645051193,
'p': 0.553133514986376,
'r': 0.9269406392694064},
{'a': 0.72893533638145,
'f1': 0.7120055517002081,
'p': 0.5738255033557047,
'r': 0.9378427787934186},
{'a': 0.7259358288770054,
'f1': 0.6898638426626325,
'p': 0.5527272727272727,
'r': 0.9175050301810865},
{'a': 0.7441451990632318,
'f1': 0.7543563799887577,
'p': 0.6330188679245283,
'r': 0.933240611961057},
{'a': 0.7573770491803279,
'f1': 0.7814960629921259,
'p': 0.6622185154295246,
'r': 0.9531812725090036},
{'a': 0.6982665222101841,
'f1': 0.7085295656724229,
'p': 0.578138343296328,
'r': 0.9148648648648648},
{'a': 0.7276190476190476,
'f1': 0.7172050098879368,
'p': 0.5887445887445888,
'r': 0.9173693086003373},
{'a': 0.7326139088729017,
'f1': 0.7385697538100819,
'p': 0.6069364161849711,
'r': 0.9431137724550899},
{'a': 0.764438676184296,
'f1': 0.7659574468085106,
'p': 0.6421621621621622,
'r': 0.9488817891373802},
{'a': 0.7251962883654532,
'f1': 0.6778242677824269,
'p': 0.5335968379446641,
'r': 0.9288990825688074},
{'a': 0.7048748353096179,
'f1': 0.6786226685796269,
'p': 0.5332581736189402,
'r': 0.9329388560157791}],
'news.bbc.co.uk;2005': [{'a': 0.7055555555555556,
'f1': 0.577127659574468,
'p': 0.41811175337186895,
'r': 0.9313304721030042},
{'a': 0.665641813989239,
'f1': 0.5853193517635844,
'p': 0.4228650137741047,
'r': 0.9504643962848297},
{'a': 0.6544943820224719,
'f1': 0.6648501362397821,
'p': 0.5115303983228512,
'r': 0.9494163424124513},
{'a': 0.7070552147239264,
'f1': 0.6879084967320263,
'p': 0.5376756066411239,
'r': 0.9546485260770975},
{'a': 0.7396265560165975,
'f1': 0.7805944055944056,
'p': 0.6499272197962155,
'r': 0.9770240700218819},
{'a': 0.7469802924348379,
'f1': 0.7596618357487923,
'p': 0.629,
'r': 0.9588414634146342},
{'a': 0.8263618677042801,
'f1': 0.8626394767218161,
'p': 0.7773925104022191,
'r': 0.9688850475367329},
{'a': 0.6912280701754386,
'f1': 0.6599690880989181,
'p': 0.5053254437869823,
'r': 0.9510022271714922},
{'a': 0.6645418326693228,
'f1': 0.5646328852119958,
'p': 0.40564635958395245,
'r': 0.9285714285714286},
{'a': 0.7045840407470289,
'f1': 0.7342158859470468,
'p': 0.5963606286186931,
'r': 0.9549668874172186},
{'a': 0.7180212014134275,
'f1': 0.7015706806282723,
'p': 0.5603345280764636,
'r': 0.938},
{'a': 0.676737160120846,
'f1': 0.6130198915009042,
'p': 0.4538152610441767,
'r': 0.9442896935933147},
{'a': 0.6666666666666666,
'f1': 0.6536144578313253,
'p': 0.49885057471264366,
'r': 0.9475982532751092},
{'a': 0.6914811972371451,
'f1': 0.6378378378378379,
'p': 0.48626373626373626,
'r': 0.9267015706806283},
{'a': 0.6484429065743945,
'f1': 0.5687606112054329,
'p': 0.4075425790754258,
'r': 0.9410112359550562},
{'a': 0.6762589928057554,
'f1': 0.5954045954045954,
'p': 0.43440233236151604,
'r': 0.946031746031746},
{'a': 0.6634920634920635,
'f1': 0.5655737704918032,
'p': 0.40350877192982454,
'r': 0.9452054794520548},
{'a': 0.7060570071258907,
'f1': 0.7348687734333155,
'p': 0.5949696444058976,
'r': 0.9607843137254902},
{'a': 0.5640096618357487,
'f1': 0.44631901840490795,
'p': 0.2942366026289181,
'r': 0.9238095238095239},
{'a': 0.751412429378531,
'f1': 0.7775530839231547,
'p': 0.6528013582342954,
'r': 0.96125},
{'a': 0.7913641574321743,
'f1': 0.8446215139442231,
'p': 0.7427427427427428,
'r': 0.978891820580475},
{'a': 0.6632,
'f1': 0.521046643913538,
'p': 0.361198738170347,
'r': 0.9346938775510204},
{'a': 0.7311492459698388,
'f1': 0.7562470532767562,
'p': 0.625585023400936,
'r': 0.9558998808104887},
{'a': 0.618230563002681,
'f1': 0.5889145496535796,
'p': 0.42535446205170974,
'r': 0.9568480300187617},
{'a': 0.6983758700696056,
'f1': 0.6980255516840884,
'p': 0.5483576642335767,
'r': 0.9600638977635783}],
'news.bbc.co.uk;2010': [{'a': 0.6463865546218487,
'f1': 0.6277423920736023,
'p': 0.4887052341597796,
'r': 0.8773491592482691},
{'a': 0.5993363749481543,
'f1': 0.4548532731376975,
'p': 0.31191950464396284,
'r': 0.8395833333333333},
{'a': 0.5348376880443388,
'f1': 0.3446737311767986,
'p': 0.22055674518201285,
'r': 0.7882653061224489},
{'a': 0.5891348088531188,
'f1': 0.4495956873315363,
'p': 0.3068432671081678,
'r': 0.8407258064516129},
{'a': 0.5521965044874823,
'f1': 0.21131447587354413,
'p': 0.12270531400966184,
'r': 0.7604790419161677},
{'a': 0.5797891675754271,
'f1': 0.48894783377541995,
'p': 0.345625,
'r': 0.8353474320241692},
{'a': 0.5684678016284234,
'f1': 0.15138282387190685,
'p': 0.0840064620355412,
'r': 0.7647058823529411},
{'a': 0.5637526652452025,
'f1': 0.3529411764705882,
'p': 0.22518159806295399,
'r': 0.8157894736842105},
{'a': 0.5631832094446874,
'f1': 0.32727272727272727,
'p': 0.2059322033898305,
'r': 0.7967213114754098},
{'a': 0.6450417052826691,
'f1': 0.6456984273820536,
'p': 0.5127326150832517,
'r': 0.8717735220649458},
{'a': 0.5781179648806843,
'f1': 0.3349893541518808,
'p': 0.21338155515370705,
'r': 0.7788778877887789},
{'a': 0.6214645486245641,
'f1': 0.5340963280877444,
'p': 0.3905160390516039,
'r': 0.8446455505279035},
{'a': 0.5641025641025641,
'f1': 0.35361216730038025,
'p': 0.22794117647058823,
'r': 0.788135593220339},
{'a': 0.5724227925184863,
'f1': 0.3596091205211726,
'p': 0.23310810810810811,
'r': 0.7863247863247863},
{'a': 0.580895008605852,
'f1': 0.2282091917591125,
'p': 0.133086876155268,
'r': 0.8},
{'a': 0.6039739636861939,
'f1': 0.5553846153846154,
'p': 0.41116173120728927,
'r': 0.8554502369668247},
{'a': 0.601304347826087,
'f1': 0.4272329793878826,
'p': 0.2878787878787879,
'r': 0.8280871670702179},
{'a': 0.5504488330341113,
'f1': 0.44057193923145677,
'p': 0.2969879518072289,
'r': 0.8529411764705882},
{'a': 0.5788510793999269,
'f1': 0.4812978819287967,
'p': 0.34099616858237547,
'r': 0.8177641653905053},
{'a': 0.6444597859855022,
'f1': 0.616530156366344,
'p': 0.4780600461893764,
'r': 0.8679245283018868},
{'a': 0.6652514696276943,
'f1': 0.6598075008297378,
'p': 0.5273209549071618,
'r': 0.8812056737588653},
{'a': 0.5748065348237317,
'f1': 0.3736542115262824,
'p': 0.24200164068908941,
'r': 0.8194444444444444},
{'a': 0.6365217391304347,
'f1': 0.6012972148035101,
'p': 0.46271285965942455,
'r': 0.8583877995642701},
{'a': 0.5708220764183712,
'f1': 0.44232698094282846,
'p': 0.30040871934604907,
'r': 0.8384030418250951},
{'a': 0.56571671629094,
'f1': 0.3622735790131168,
'p': 0.2333065164923572,
'r': 0.8100558659217877}],
'news.bbc.co.uk;2015': [{'a': 0.49690869877785765,
'f1': 0.21984392419175025,
'p': 0.12615148413510746,
'r': 0.854419410745234},
{'a': 0.4873889366580682,
'f1': 0.18649078917443712,
'p': 0.10429916051895192,
'r': 0.8798283261802575},
{'a': 0.5163737514224301,
'f1': 0.3583291394061399,
'p': 0.22343096234309623,
'r': 0.9043183742591024},
{'a': 0.4793890865954923,
'f1': 0.12770186335403727,
'p': 0.06934700485698866,
'r': 0.8056426332288401},
{'a': 0.5246474143720617,
'f1': 0.3459619294030678,
'p': 0.21418764302059495,
'r': 0.899135446685879},
{'a': 0.4652952493710226,
'f1': 0.1642377978255841,
'p': 0.09090909090909091,
'r': 0.8492822966507177},
{'a': 0.48122332859174966,
'f1': 0.18393376594316405,
'p': 0.10303334168964653,
'r': 0.85625},
{'a': 0.48684939091915835,
'f1': 0.22722534917656864,
'p': 0.13078953683705302,
'r': 0.8650793650793651},
{'a': 0.4801213960546282,
'f1': 0.10220125786163523,
'p': 0.0546218487394958,
'r': 0.7926829268292683},
{'a': 0.469634388863393,
'f1': 0.15211543164587918,
'p': 0.0835347837765243,
'r': 0.8497267759562842},
{'a': 0.44771994472593274,
'f1': 0.0596078431372549,
'p': 0.031054208662489785,
'r': 0.7402597402597403},
{'a': 0.4550916191048363,
'f1': 0.07164790174002048,
'p': 0.037614185921547555,
'r': 0.7526881720430108},
{'a': 0.49805708160257267,
'f1': 0.272621359223301,
'p': 0.1610091743119266,
'r': 0.8886075949367088},
{'a': 0.4813125367863449,
'f1': 0.14379402477532183,
'p': 0.07855626326963906,
'r': 0.8481375358166189},
{'a': 0.4837876614060258,
'f1': 0.17853881278538813,
'p': 0.09961783439490446,
'r': 0.8593406593406593},
{'a': 0.47725610647945005,
'f1': 0.1350435624394966,
'p': 0.07344037904711766,
'r': 0.8378378378378378},
{'a': 0.48186820939748837,
'f1': 0.2317991631799163,
'p': 0.13384875573810098,
'r': 0.8642745709828393},
{'a': 0.5041983206717313,
'f1': 0.2927756653992396,
'p': 0.1751990898748578,
'r': 0.8901734104046243},
{'a': 0.5088987764182424,
'f1': 0.27918367346938777,
'p': 0.16561743341404359,
'r': 0.8883116883116883},
{'a': 0.4595739219712526,
'f1': 0.21917300203968107,
'p': 0.12547770700636943,
'r': 0.8653001464128843},
{'a': 0.4967490247074122,
'f1': 0.21712744436952125,
'p': 0.12403697996918336,
'r': 0.8702702702702703},
{'a': 0.47959183673469385,
'f1': 0.20647603027754416,
'p': 0.1171558100691959,
'r': 0.8690265486725663},
{'a': 0.5118998486724446,
'f1': 0.30184966548602915,
'p': 0.18132387706855793,
'r': 0.9002347417840375},
{'a': 0.49269399914881545,
'f1': 0.2256388046773495,
'p': 0.12960199004975123,
'r': 0.8712374581939799},
{'a': 0.4387309980171844,
'f1': 0.13978930307941653,
'p': 0.07614213197969544,
'r': 0.8518518518518519}],
'news.yahoo.com;2000': [{'a': 0.8360881542699724,
'f1': 0.8627450980392156,
'p': 0.7965921192758253,
'r': 0.9408805031446541},
{'a': 0.8471290082028337,
'f1': 0.8353413654618473,
'p': 0.7987711213517665,
'r': 0.8754208754208754},
{'a': 0.8091674462114126,
'f1': 0.8089887640449437,
'p': 0.7128712871287128,
'r': 0.935064935064935},
{'a': 0.7725366876310272,
'f1': 0.741973840665874,
'p': 0.6202783300198808,
'r': 0.9230769230769231},
{'a': 0.7598086124401914,
'f1': 0.7584215591915302,
'p': 0.6365105008077544,
'r': 0.9380952380952381},
{'a': 0.7884250474383302,
'f1': 0.7824390243902438,
'p': 0.6843003412969283,
'r': 0.9134396355353075},
{'a': 0.763216348289649,
'f1': 0.8077893977641543,
'p': 0.8339538346984363,
'r': 0.7832167832167832},
{'a': 0.7994780339277947,
'f1': 0.8209708737864078,
'p': 0.7620764239365537,
'r': 0.8897306397306397},
{'a': 0.7989159891598916,
'f1': 0.7980402830702231,
'p': 0.7315369261477046,
'r': 0.8778443113772455},
{'a': 0.8038251366120218,
'f1': 0.7886992348440259,
'p': 0.7173447537473233,
'r': 0.8758169934640523},
{'a': 0.72895467160037,
'f1': 0.725912067352666,
'p': 0.5932721712538226,
'r': 0.9349397590361446},
{'a': 0.8344827586206897,
'f1': 0.8712195121951221,
'p': 0.7973214285714286,
'r': 0.9602150537634409},
{'a': 0.7567049808429118,
'f1': 0.7158836689038031,
'p': 0.5947955390334573,
'r': 0.898876404494382},
{'a': 0.7905944986690329,
'f1': 0.7947826086956522,
'p': 0.6903323262839879,
'r': 0.9364754098360656},
{'a': 0.8144030932817786,
'f1': 0.8698305084745762,
'p': 0.7808886183810103,
'r': 0.981637337413925},
{'a': 0.7850055126791621,
'f1': 0.7866520787746171,
'p': 0.7233400402414487,
'r': 0.8621103117505995},
{'a': 0.7755102040816326,
'f1': 0.7236180904522613,
'p': 0.6075949367088608,
'r': 0.8944099378881988},
{'a': 0.7421236872812136,
'f1': 0.6530612244897959,
'p': 0.5135802469135803,
'r': 0.896551724137931},
{'a': 0.791907514450867,
'f1': 0.782258064516129,
'p': 0.6724436741767764,
'r': 0.9349397590361446},
{'a': 0.7447193723596862,
'f1': 0.6923636363636364,
'p': 0.5927770859277709,
'r': 0.8321678321678322},
{'a': 0.7362068965517241,
'f1': 0.5048543689320389,
'p': 0.3880597014925373,
'r': 0.7222222222222222},
{'a': 0.7826446280991736,
'f1': 0.779547359597653,
'p': 0.6808199121522694,
'r': 0.9117647058823529},
{'a': 0.7142857142857143,
'f1': 0.6615384615384615,
'p': 0.5139442231075697,
'r': 0.9280575539568345},
{'a': 0.8239700374531835,
'f1': 0.8502230720203952,
'p': 0.7657864523536165,
'r': 0.9555873925501432},
{'a': 0.9327354260089686,
'f1': 0.9419054996127032,
'p': 0.9170437405731523,
'r': 0.9681528662420382}],
'news.yahoo.com;2005': [{'a': 0.6400189214758751,
'f1': 0.6712742980561553,
'p': 0.5155938951559389,
'r': 0.9616336633663366},
{'a': 0.620010643959553,
'f1': 0.6106870229007633,
'p': 0.449438202247191,
'r': 0.9523809523809523},
{'a': 0.6973929236499069,
'f1': 0.7333880229696473,
'p': 0.5971943887775552,
'r': 0.9500531349628055},
{'a': 0.6673567977915804,
'f1': 0.6036184210526315,
'p': 0.44920440636474906,
'r': 0.9197994987468672},
{'a': 0.6273584905660378,
'f1': 0.45391705069124416,
'p': 0.3044822256568779,
'r': 0.8914027149321267},
{'a': 0.7228490832157969,
'f1': 0.687847498014297,
'p': 0.54125,
'r': 0.9433551198257081},
{'a': 0.577020202020202,
'f1': 0.5044378698224852,
'p': 0.345841784989858,
'r': 0.9316939890710383},
{'a': 0.6563658838071693,
'f1': 0.6268456375838927,
'p': 0.467,
'r': 0.9530612244897959},
{'a': 0.8858267716535433,
'f1': 0.8417770849571318,
'p': 0.7906295754026355,
'r': 0.9},
{'a': 0.703672075149445,
'f1': 0.7510760401721664,
'p': 0.6166077738515902,
'r': 0.9605504587155963},
{'a': 0.8256555634301913,
'f1': 0.00806451612903226,
'p': 1.0,
'r': 0.004048582995951417},
{'a': 0.6662341032961329,
'f1': 0.695837275307474,
'p': 0.5464338781575037,
'r': 0.9576822916666666},
{'a': 0.8917779339423753,
'f1': 0.8917018284106891,
'p': 0.8470273881095525,
'r': 0.941351150705271},
{'a': 0.7445407279029462,
'f1': 0.8122292993630573,
'p': 0.6960698689956332,
'r': 0.9749235474006116},
{'a': 0.7778155835318136,
'f1': 0.8401468788249694,
'p': 0.7374301675977654,
'r': 0.9761092150170648},
{'a': 0.6752,
'f1': 0.7280643000669792,
'p': 0.5844086021505376,
'r': 0.9653641207815276},
{'a': 0.5894378194207837,
'f1': 0.3486486486486487,
'p': 0.21717171717171718,
'r': 0.8835616438356164},
{'a': 0.6485182632667126,
'f1': 0.5707070707070707,
'p': 0.41748768472906406,
'r': 0.901595744680851},
{'a': 0.776536312849162,
'f1': 0.7995991983967936,
'p': 0.6843910806174958,
'r': 0.9614457831325302},
{'a': 0.7656765676567657,
'f1': 0.8039447731755425,
'p': 0.6866576819407008,
'r': 0.9695528068506185},
{'a': 0.541927409261577,
'f1': 0.436055469953775,
'p': 0.28643724696356276,
'r': 0.9129032258064517},
{'a': 0.62841796875,
'f1': 0.6497929130234699,
'p': 0.4937062937062937,
'r': 0.9502018842530283},
{'a': 0.7472941176470588,
'f1': 0.7868201667328304,
'p': 0.6651006711409396,
'r': 0.9630709426627794},
{'a': 0.5521594684385382,
'f1': 0.43456375838926176,
'p': 0.2861878453038674,
'r': 0.9024390243902439},
{'a': 0.7140263277349069,
'f1': 0.7552447552447552,
'p': 0.6183206106870229,
'r': 0.9700598802395209}],
'news.yahoo.com;2010': [{'a': 0.546796256299496,
'f1': 0.5574692442882251,
'p': 0.3996975806451613,
'r': 0.9210220673635308},
{'a': 0.45284825275251317,
'f1': 0.2742857142857143,
'p': 0.16463414634146342,
'r': 0.8212927756653993},
{'a': 0.5024,
'f1': 0.4514991181657848,
'p': 0.30313795145056244,
'r': 0.8842832469775475},
{'a': 0.5352635486265775,
'f1': 0.53003003003003,
'p': 0.3753322700691122,
'r': 0.9016602809706258},
{'a': 0.5820795720494818,
'f1': 0.6186699206833435,
'p': 0.4632252169940612,
'r': 0.931129476584022},
{'a': 0.4790552584670232,
'f1': 0.37852206273258904,
'p': 0.24119241192411925,
'r': 0.8790123456790123},
{'a': 0.5543165467625899,
'f1': 0.5632710609799084,
'p': 0.4059959349593496,
'r': 0.9194476409666283},
{'a': 0.4846698113207547,
'f1': 0.43467011642949555,
'p': 0.2868525896414343,
'r': 0.896797153024911},
{'a': 0.5812964930924548,
'f1': 0.6025554808338938,
'p': 0.4471057884231537,
'r': 0.9237113402061856},
{'a': 0.45057766367137353,
'f1': 0.33471502590673574,
'p': 0.20838709677419354,
'r': 0.85},
{'a': 0.41866913123844735,
'f1': 0.21960297766749376,
'p': 0.12642857142857142,
'r': 0.8349056603773585},
{'a': 0.4810679611650485,
'f1': 0.26930963773069033,
'p': 0.16121112929623568,
'r': 0.8174273858921162},
{'a': 0.637403488704604,
'f1': 0.702626641651032,
'p': 0.5614692653673163,
'r': 0.9385964912280702},
{'a': 0.43103448275862066,
'f1': 0.19624217118997914,
'p': 0.11146245059288537,
'r': 0.8197674418604651},
{'a': 0.5563783237967015,
'f1': 0.5826472450918303,
'p': 0.42494226327944573,
'r': 0.9264853977844915},
{'a': 0.5243069505825633,
'f1': 0.49184549356223173,
'p': 0.338452451269935,
'r': 0.8995290423861853},
{'a': 0.5063239494084047,
'f1': 0.4636524822695035,
'p': 0.31149493746277546,
'r': 0.9064124783362218},
{'a': 0.48669991687448044,
'f1': 0.41827602449364104,
'p': 0.2733990147783251,
'r': 0.8897795591182365},
{'a': 0.5035657686212361,
'f1': 0.46153846153846156,
'p': 0.31257275902211873,
'r': 0.8817733990147784},
{'a': 0.5185332801913113,
'f1': 0.485080988917306,
'p': 0.3329432416617905,
'r': 0.8932496075353218},
{'a': 0.50093388121031,
'f1': 0.4781250000000001,
'p': 0.32501327668613916,
'r': 0.9039881831610044},
{'a': 0.5666204024982651,
'f1': 0.5911620294599017,
'p': 0.43686502177068215,
'r': 0.9139676113360324},
{'a': 0.5485294117647059,
'f1': 0.5514974433893353,
'p': 0.39590980597797587,
'r': 0.9085439229843562},
{'a': 0.5541579315164221,
'f1': 0.5743829219479653,
'p': 0.4155405405405405,
'r': 0.9298056155507559},
{'a': 0.4371970030850595,
'f1': 0.2837913628715648,
'p': 0.17048517520215634,
'r': 0.8461538461538461}],
'news.yahoo.com;2015': [{'a': 0.2978950537869871,
'f1': 0.03535163595336593,
'p': 0.018125723100655612,
'r': 0.7121212121212122},
{'a': 0.3406326683025712,
'f1': 0.024851090686757917,
'p': 0.012654159803960953,
'r': 0.6877729257641921},
{'a': 0.2534767320302848,
'f1': 0.01873789844059045,
'p': 0.009495073112062963,
'r': 0.7053291536050157},
{'a': 0.33942259830761573,
'f1': 0.04785478547854786,
'p': 0.024728432135839543,
'r': 0.7386489479512736},
{'a': 0.38908852295097335,
'f1': 0.026490066225165563,
'p': 0.013495687468290208,
'r': 0.7131367292225201},
{'a': 0.325479181295675,
'f1': 0.0651672433679354,
'p': 0.03402973837756446,
'r': 0.7667514843087362},
{'a': 0.3467218840435086,
'f1': 0.052536362978507854,
'p': 0.027250206440957887,
'r': 0.7289156626506024},
{'a': 0.3453233648768756,
'f1': 0.055727320714878216,
'p': 0.028872826723969524,
'r': 0.7971952535059331},
{'a': 0.26059209628765023,
'f1': 0.03421375678137251,
'p': 0.017491294841687585,
'r': 0.7783783783783784},
{'a': 0.39315189315189314,
'f1': 0.028932909924895407,
'p': 0.014762615091816264,
'r': 0.7211055276381909},
{'a': 0.29253921490564533,
'f1': 0.008702443378333148,
'p': 0.0043802178877615965,
'r': 0.6573033707865169},
{'a': 0.33761166192205755,
'f1': 0.03620782003213712,
'p': 0.018589132507149667,
'r': 0.6935704514363885},
{'a': 0.3593414097473381,
'f1': 0.04860093896713615,
'p': 0.025104764861089555,
'r': 0.7584994138335287},
{'a': 0.328121298270552,
'f1': 0.04296962879640045,
'p': 0.02217063261752757,
'r': 0.6945454545454546},
{'a': 0.2474844413424068,
'f1': 0.02926170468187275,
'p': 0.014928515378284752,
'r': 0.7337723424270931},
{'a': 0.3322700691121744,
'f1': 0.022768222768222768,
'p': 0.011576580328141918,
'r': 0.6847290640394089},
{'a': 0.3376675235219247,
'f1': 0.03378599774274584,
'p': 0.017321835218576175,
'r': 0.6823529411764706},
{'a': 0.25410994091959926,
'f1': 0.013294822240156313,
'p': 0.006718900933776967,
'r': 0.624750499001996},
{'a': 0.3277816795424902,
'f1': 0.03388608507570296,
'p': 0.017388730622664546,
'r': 0.6610407876230661},
{'a': 0.3458661866490886,
'f1': 0.041887927513089986,
'p': 0.021551394726786396,
'r': 0.7430830039525692},
{'a': 0.3264702162640698,
'f1': 0.018503501658680428,
'p': 0.009392306540936986,
'r': 0.6182266009852216},
{'a': 0.2662884452825281,
'f1': 0.024719635255731544,
'p': 0.012570599740676439,
'r': 0.7371134020618557},
{'a': 0.3410535876475931,
'f1': 0.07040496341504535,
'p': 0.03690349946977731,
'r': 0.7637161667885881},
{'a': 0.34464260505387984,
'f1': 0.06391988170453018,
'p': 0.03338950916368232,
'r': 0.7464678178963893},
{'a': 0.2548574825568662,
'f1': 0.01092260730527234,
'p': 0.005511893534527927,
'r': 0.5950226244343891}],
'thenation.com;2000': [{'a': 0.9352170916609235,
'f1': 0.9546332046332046,
'p': 0.935666982024598,
'r': 0.9743842364532019},
{'a': 0.8833461243284727,
'f1': 0.9100591715976332,
'p': 0.8621076233183856,
'r': 0.9636591478696742},
{'a': 0.8194915254237288,
'f1': 0.84796573875803,
'p': 0.7547649301143583,
'r': 0.9674267100977199},
{'a': 0.9183376925312409,
'f1': 0.9519575995896734,
'p': 0.9227709645343056,
'r': 0.9830508474576272},
{'a': 0.7515225334957369,
'f1': 0.8073654390934845,
'p': 0.692868719611021,
'r': 0.9671945701357466},
{'a': 0.8347597103357473,
'f1': 0.8771414586392561,
'p': 0.7978628673196795,
'r': 0.9739130434782609},
{'a': 0.9278169014084507,
'f1': 0.9586514911396051,
'p': 0.9293296089385474,
'r': 0.9898839631062184},
{'a': 0.9260474507824331,
'f1': 0.9575669804489502,
'p': 0.9270891755468312,
'r': 0.9901168014375562},
{'a': 0.8884520884520885,
'f1': 0.9266558966074314,
'p': 0.8770642201834863,
'r': 0.9821917808219178},
{'a': 0.8868229587712207,
'f1': 0.911504424778761,
'p': 0.8532544378698225,
'r': 0.9782903663500678},
{'a': 0.8986725663716815,
'f1': 0.9356922212861555,
'p': 0.8918629550321199,
'r': 0.9840519787359716},
{'a': 0.7913333333333333,
'f1': 0.8400613183444048,
'p': 0.7358997314234557,
'r': 0.9785714285714285},
{'a': 0.92643391521197,
'f1': 0.8747346072186836,
'p': 0.8841201716738197,
'r': 0.865546218487395},
{'a': 0.9190821256038647,
'f1': 0.8629856850715746,
'p': 0.8373015873015873,
'r': 0.890295358649789},
{'a': 0.9169349645846748,
'f1': 0.9421783953384133,
'p': 0.9013722126929674,
'r': 0.9868544600938968},
{'a': 0.8360323886639676,
'f1': 0.8770864946889226,
'p': 0.7932296431838975,
'r': 0.9807692307692307},
{'a': 0.91296180338134,
'f1': 0.9398007795582504,
'p': 0.9011627906976745,
'r': 0.9819004524886877},
{'a': 0.9568313306631063,
'f1': 0.9734027968193035,
'p': 0.9657236126224157,
'r': 0.9812050856826976},
{'a': 0.8551495016611296,
'f1': 0.8935546875,
'p': 0.8243243243243243,
'r': 0.9754797441364605},
{'a': 0.8858299595141701,
'f1': 0.9108159392789374,
'p': 0.8520710059171598,
'r': 0.9782608695652174},
{'a': 0.9500562429696288,
'f1': 0.9717197452229299,
'p': 0.9563691073219659,
'r': 0.9875712066286898},
{'a': 0.9602003162888771,
'f1': 0.978648190045249,
'p': 0.9647337608028994,
'r': 0.9929698708751793},
{'a': 0.9270178830352828,
'f1': 0.9537802265074992,
'p': 0.9246290801186944,
'r': 0.9848293299620733},
{'a': 0.8469197261978842,
'f1': 0.8902765388046388,
'p': 0.8207236842105263,
'r': 0.9727095516569201},
{'a': 0.891358024691358,
'f1': 0.9246575342465754,
'p': 0.8773354995938262,
'r': 0.9773755656108597}],
'thenation.com;2005': [{'a': 0.7422003284072249,
'f1': 0.8379772961816306,
'p': 0.7308730873087309,
'r': 0.9818621523579202},
{'a': 0.515638963360143,
'f1': 0.6215083798882681,
'p': 0.4587628865979381,
'r': 0.9632034632034632},
{'a': 0.7669902912621359,
'f1': 0.8535394629780308,
'p': 0.7568542568542569,
'r': 0.9785447761194029},
{'a': 0.84376427592508,
'f1': 0.9064551422319476,
'p': 0.8436863543788188,
'r': 0.9793144208037825},
{'a': 0.5734919286321155,
'f1': 0.6718954248366014,
'p': 0.5207700101317123,
'r': 0.9465930018416207},
{'a': 0.908300395256917,
'f1': 0.9250645994832042,
'p': 0.8883374689826302,
'r': 0.9649595687331537},
{'a': 0.7106274007682458,
'f1': 0.803135888501742,
'p': 0.6844840386043058,
'r': 0.9715489989462592},
{'a': 0.8013289036544851,
'f1': 0.8441896821261075,
'p': 0.763430725730443,
'r': 0.9440559440559441},
{'a': 0.7294654498044328,
'f1': 0.8179025888547609,
'p': 0.7071320182094082,
'r': 0.9698231009365245},
{'a': 0.7561349693251533,
'f1': 0.8175908221797323,
'p': 0.7079470198675497,
'r': 0.967420814479638},
{'a': 0.7597571552471812,
'f1': 0.8435911914172783,
'p': 0.7455089820359282,
'r': 0.9713914174252276},
{'a': 0.6508944027697634,
'f1': 0.766859344894027,
'p': 0.6313451776649747,
'r': 0.9764474975466143},
{'a': 0.8038277511961722,
'f1': 0.8547815820543093,
'p': 0.7653276955602537,
'r': 0.9679144385026738},
{'a': 0.700948212983224,
'f1': 0.8056872037914693,
'p': 0.6805444355484388,
'r': 0.9872241579558653},
{'a': 0.7948051948051948,
'f1': 0.8736,
'p': 0.7913043478260869,
'r': 0.975},
{'a': 0.9071925754060325,
'f1': 0.9481058640373639,
'p': 0.9116766467065869,
'r': 0.9875675675675676},
{'a': 0.8120300751879699,
'f1': 0.842964824120603,
'p': 0.7472160356347439,
'r': 0.9668587896253602},
{'a': 0.8900118906064209,
'f1': 0.9175211769950958,
'p': 0.8705583756345178,
'r': 0.9698397737983034},
{'a': 0.780285929270128,
'f1': 0.8108808290155439,
'p': 0.705749718151071,
'r': 0.9528158295281582},
{'a': 0.7981283422459893,
'f1': 0.8356909684439608,
'p': 0.7370441458733206,
'r': 0.964824120603015},
{'a': 0.7818831942789034,
'f1': 0.827683615819209,
'p': 0.7288557213930348,
'r': 0.9575163398692811},
{'a': 0.8144391408114559,
'f1': 0.859466787166742,
'p': 0.7712895377128953,
'r': 0.9704081632653061},
{'a': 0.7480569948186528,
'f1': 0.7925333333333334,
'p': 0.6705776173285198,
'r': 0.9687092568448501},
{'a': 0.7820372398685652,
'f1': 0.8351284175642087,
'p': 0.7309644670050761,
'r': 0.9739130434782609},
{'a': 0.7299546142208775,
'f1': 0.8238776517020227,
'p': 0.7142857142857143,
'r': 0.9731934731934732}],
'thenation.com;2010': [{'a': 0.5942393736017897,
'f1': 0.5807570066454781,
'p': 0.42015050167224083,
'r': 0.9401309635173059},
{'a': 0.618707015130674,
'f1': 0.5925925925925926,
'p': 0.4388332607749238,
'r': 0.9122171945701357},
{'a': 0.5761209593326382,
'f1': 0.5659369994660971,
'p': 0.40535372848948376,
'r': 0.9372236958443855},
{'a': 0.6153171587255188,
'f1': 0.587719298245614,
'p': 0.4275296262534184,
'r': 0.9398797595190381},
{'a': 0.6742620403935785,
'f1': 0.6818411734951948,
'p': 0.53812375249501,
'r': 0.9302967563837129},
{'a': 0.7122785583384239,
'f1': 0.36092265943012214,
'p': 0.23211169284467714,
'r': 0.8109756097560976},
{'a': 0.5451298701298701,
'f1': 0.43210376976084314,
'p': 0.2838125665601704,
'r': 0.9049235993208828},
{'a': 0.39684985447697313,
'f1': 0.3956081660662207,
'p': 0.25016272510305926,
'r': 0.9450819672131148},
{'a': 0.8165793234873749,
'f1': 0.8574074074074074,
'p': 0.7685922974767596,
'r': 0.9694304857621441},
{'a': 0.7490712597095576,
'f1': 0.7255264130033248,
'p': 0.6031941031941032,
'r': 0.9101019462465245},
{'a': 0.6472749540722597,
'f1': 0.5952213633169361,
'p': 0.44508670520231214,
'r': 0.8981972428419936},
{'a': 0.6501617317740731,
'f1': 0.6560665362035225,
'p': 0.5093049753133309,
'r': 0.9216494845360824},
{'a': 0.5658914728682171,
'f1': 0.6038240917782026,
'p': 0.4395879732739421,
'r': 0.963980463980464},
{'a': 0.6216008209338122,
'f1': 0.5685873062298917,
'p': 0.4259421560035057,
'r': 0.8548812664907651},
{'a': 0.615167659041053,
'f1': 0.5759668508287292,
'p': 0.4163754368447329,
'r': 0.9339305711086227},
{'a': 0.5034880923743084,
'f1': 0.46858908341915545,
'p': 0.3133608815426997,
'r': 0.9285714285714286},
{'a': 0.6667798913043478,
'f1': 0.6332710280373831,
'p': 0.4766460326392797,
'r': 0.9432071269487751},
{'a': 0.7145852051512429,
'f1': 0.7000314762354422,
'p': 0.5653279105236401,
'r': 0.9190082644628099},
{'a': 0.3586794462193823,
'f1': 0.40305313243457574,
'p': 0.2569839464037416,
'r': 0.9338539274230593},
{'a': 0.5134237475781899,
'f1': 0.4008179959100205,
'p': 0.25983208130799823,
'r': 0.8763040238450075},
{'a': 0.47105872447905706,
'f1': 0.4336263240928555,
'p': 0.28394332939787487,
'r': 0.9170638703527169},
{'a': 0.7478336221837089,
'f1': 0.7487769784172662,
'p': 0.6260827718960539,
'r': 0.9312813171080888},
{'a': 0.5436736575247753,
'f1': 0.5242671792407496,
'p': 0.3668459986550101,
'r': 0.9183501683501684},
{'a': 0.7528369775809577,
'f1': 0.74978985710283,
'p': 0.6261113710809546,
'r': 0.9343575418994413},
{'a': 0.6234670202187603,
'f1': 0.5613899613899614,
'p': 0.4045631608235949,
'r': 0.9167717528373266}],
'thenation.com;2015': [{'a': 0.6683217477656405,
'f1': 0.7321571772253408,
'p': 0.6024414384691521,
'r': 0.9330608073582013},
{'a': 0.5179186767746382,
'f1': 0.5197391005835909,
'p': 0.3609918931807344,
'r': 0.9276960784313726},
{'a': 0.569672131147541,
'f1': 0.6094420600858369,
'p': 0.44879898862199746,
'r': 0.9491978609625669},
{'a': 0.7424322889006904,
'f1': 0.8250991705733862,
'p': 0.7194968553459119,
'r': 0.967032967032967},
{'a': 0.6103041929295697,
'f1': 0.6750457038391225,
'p': 0.5211714890613973,
'r': 0.9578469520103762},
{'a': 0.6079578700994733,
'f1': 0.6445623342175065,
'p': 0.5022736668044646,
'r': 0.8993338267949667},
{'a': 0.609662122110255,
'f1': 0.6414375170160631,
'p': 0.4970464135021097,
'r': 0.9040675364543361},
{'a': 0.45186399016796397,
'f1': 0.2821888412017167,
'p': 0.17200784826684107,
'r': 0.7850746268656716},
{'a': 0.5419605077574048,
'f1': 0.521194249907851,
'p': 0.3691906005221932,
'r': 0.8859649122807017},
{'a': 0.6588055130168453,
'f1': 0.6628329297820823,
'p': 0.5284749034749034,
'r': 0.8887987012987013},
{'a': 0.43117593436645396,
'f1': 0.13453536754507628,
'p': 0.07461538461538461,
'r': 0.6830985915492958},
{'a': 0.539161520961976,
'f1': 0.5577043044291953,
'p': 0.3983957219251337,
'r': 0.9293139293139293},
{'a': 0.6002372479240806,
'f1': 0.6304824561403509,
'p': 0.4872881355932203,
'r': 0.8928571428571429},
{'a': 0.6015408320493066,
'f1': 0.6298310907529344,
'p': 0.48013967699694454,
'r': 0.9151414309484193},
{'a': 0.6577736890524379,
'f1': 0.6618181818181819,
'p': 0.5255052935514918,
'r': 0.8936170212765957},
{'a': 0.45345963756177926,
'f1': 0.27287671232876715,
'p': 0.16700201207243462,
'r': 0.7455089820359282},
{'a': 0.6854491966508259,
'f1': 0.7674807627969221,
'p': 0.6352810855718638,
'r': 0.9691592733417829},
{'a': 0.7682108626198083,
'f1': 0.848743875742729,
'p': 0.7495857116553121,
'r': 0.9781355117731859},
{'a': 0.6928146258503401,
'f1': 0.7743950039032006,
'p': 0.6505771248688352,
'r': 0.956421133821828},
{'a': 0.7130681818181818,
'f1': 0.7978652434956638,
'p': 0.678928247048138,
'r': 0.9673244904561631},
{'a': 0.5623215984776403,
'f1': 0.5953079178885631,
'p': 0.4371231696813092,
'r': 0.9329044117647058},
{'a': 0.6189645142524723,
'f1': 0.6685222672064777,
'p': 0.5215159889459139,
'r': 0.930937279774489},
{'a': 0.7196644557159578,
'f1': 0.80301576422207,
'p': 0.68562734082397,
'r': 0.968905061197486},
{'a': 0.45239139973672665,
'f1': 0.21012658227848102,
'p': 0.12332838038632987,
'r': 0.7094017094017094},
{'a': 0.5498090940645609,
'f1': 0.5295611171563293,
'p': 0.37823834196891193,
'r': 0.8827085852478839}],
'www.cnn.com;2000': [{'a': 0.7958280657395702,
'f1': 0.7174103237095364,
'p': 0.6539074960127592,
'r': 0.7945736434108527},
{'a': 0.7127397934087555,
'f1': 0.7708006279434853,
'p': 0.6680272108843538,
'r': 0.9109461966604824},
{'a': 0.9567706842255941,
'f1': 0.9742628259757967,
'p': 0.9855172413793103,
'r': 0.963262554769127},
{'a': 0.7299226650803093,
'f1': 0.6031468531468531,
'p': 0.47983310152990266,
'r': 0.8117647058823529},
{'a': 0.8297587131367292,
'f1': 0.7053364269141531,
'p': 0.6178861788617886,
'r': 0.8216216216216217},
{'a': 0.8420512820512821,
'f1': 0.8724461623412479,
'p': 0.8254963427377221,
'r': 0.9250585480093677},
{'a': 0.7323651452282157,
'f1': 0.7654545454545455,
'p': 0.6345139412207988,
'r': 0.9644902634593356},
{'a': 0.7943116240725474,
'f1': 0.804696673189824,
'p': 0.7363896848137536,
'r': 0.8869715271786023},
{'a': 0.7798994974874371,
'f1': 0.7751540041067762,
'p': 0.6807935076645627,
'r': 0.899880810488677},
{'a': 0.8594401429422275,
'f1': 0.8311874105865522,
'p': 0.8035961272475796,
'r': 0.8607407407407407},
{'a': 0.759375,
'f1': 0.7528656579550665,
'p': 0.650554675118859,
'r': 0.8933623503808488},
{'a': 0.823240589198036,
'f1': 0.8175675675675675,
'p': 0.7531120331950207,
'r': 0.8940886699507389},
{'a': 0.757201646090535,
'f1': 0.7139393939393939,
'p': 0.605344295991778,
'r': 0.8700147710487445},
{'a': 0.754726368159204,
'f1': 0.7570231641202563,
'p': 0.6689895470383276,
'r': 0.8717366628830874},
{'a': 0.880184331797235,
'f1': 0.8990291262135922,
'p': 0.8852772466539197,
'r': 0.9132149901380671},
{'a': 0.7287844036697247,
'f1': 0.6260869565217392,
'p': 0.49936948297604034,
'r': 0.8389830508474576},
{'a': 0.7060931899641577,
'f1': 0.7139534883720929,
'p': 0.5690454124189064,
'r': 0.9578783151326054},
{'a': 0.7039517749497656,
'f1': 0.440506329113924,
'p': 0.30472854640980734,
'r': 0.7945205479452054},
{'a': 0.805125467164976,
'f1': 0.785167745732784,
'p': 0.702845100105374,
'r': 0.8893333333333333},
{'a': 0.8487865588052271,
'f1': 0.7993393889347646,
'p': 0.7598116169544741,
'r': 0.8432055749128919},
{'a': 0.8317631224764468,
'f1': 0.7093023255813954,
'p': 0.613682092555332,
'r': 0.8402203856749312},
{'a': 0.8064516129032258,
'f1': 0.8432835820895522,
'p': 0.7609427609427609,
'r': 0.9456066945606695},
{'a': 0.8111964873765093,
'f1': 0.8605028386050284,
'p': 0.772197962154294,
'r': 0.9716117216117216},
{'a': 0.7410106899902819,
'f1': 0.8017113095238095,
'p': 0.70632579482137,
'r': 0.9268817204301075},
{'a': 0.6913158892681077,
'f1': 0.700515084621045,
'p': 0.5815516188149054,
'r': 0.8806660499537465}],
'www.cnn.com;2005': [{'a': 0.7191574724172518,
'f1': 0.5953757225433526,
'p': 0.45474613686534215,
'r': 0.8619246861924686},
{'a': 0.7776689520078355,
'f1': 0.6523736600306278,
'p': 0.5285359801488834,
'r': 0.852},
{'a': 0.8085485307212823,
'f1': 0.7873392680514342,
'p': 0.6951965065502184,
'r': 0.9076396807297605},
{'a': 0.7859744990892532,
'f1': 0.7134146341463414,
'p': 0.6087408949011447,
'r': 0.8615611192930781},
{'a': 0.7874865156418555,
'f1': 0.602020202020202,
'p': 0.4790996784565916,
'r': 0.8097826086956522},
{'a': 0.822429906542056,
'f1': 0.7831265508684863,
'p': 0.714027149321267,
'r': 0.8670329670329671},
{'a': 0.7930648769574944,
'f1': 0.6435452793834296,
'p': 0.5301587301587302,
'r': 0.8186274509803921},
{'a': 0.7864864864864864,
'f1': 0.6116027531956735,
'p': 0.49053627760252366,
'r': 0.8120104438642297},
{'a': 0.8133442488743349,
'f1': 0.8041237113402061,
'p': 0.740506329113924,
'r': 0.8796992481203008},
{'a': 0.8198999444135631,
'f1': 0.547486033519553,
'p': 0.44144144144144143,
'r': 0.7205882352941176},
{'a': 0.8260297984224365,
'f1': 0.7882666666666666,
'p': 0.7058261700095511,
'r': 0.892512077294686},
{'a': 0.7949640287769785,
'f1': 0.6472148541114058,
'p': 0.5266187050359712,
'r': 0.8394495412844036},
{'a': 0.7809568480300187,
'f1': 0.6785960082587751,
'p': 0.5739231664726426,
'r': 0.82996632996633},
{'a': 0.8590747330960854,
'f1': 0.8381030253475061,
'p': 0.786042944785276,
'r': 0.8975481611208407},
{'a': 0.8205234732031574,
'f1': 0.7925072046109509,
'p': 0.7287985865724381,
'r': 0.868421052631579},
{'a': 0.7914547304170906,
'f1': 0.651360544217687,
'p': 0.5448079658605974,
'r': 0.8097251585623678},
{'a': 0.8573099415204678,
'f1': 0.8013029315960911,
'p': 0.7394789579158316,
'r': 0.8744075829383886},
{'a': 0.7658069883527454,
'f1': 0.7050811943425878,
'p': 0.5945229681978799,
'r': 0.8661518661518661},
{'a': 0.7814530419373893,
'f1': 0.6169772256728778,
'p': 0.494195688225539,
'r': 0.8209366391184573},
{'a': 0.7784026996625422,
'f1': 0.5320665083135392,
'p': 0.4057971014492754,
'r': 0.7724137931034483},
{'a': 0.8084307178631052,
'f1': 0.7649769585253456,
'p': 0.6878453038674033,
'r': 0.8615916955017301},
{'a': 0.8011075219197047,
'f1': 0.7674042093901782,
'p': 0.6732954545454546,
'r': 0.8920953575909661},
{'a': 0.8116760828625236,
'f1': 0.7329773030707609,
'p': 0.6288659793814433,
'r': 0.8784},
{'a': 0.8114992721979621,
'f1': 0.6514131897711979,
'p': 0.545045045045045,
'r': 0.8093645484949833},
{'a': 0.8244174597965211,
'f1': 0.8412933847522991,
'p': 0.7838584853510226,
'r': 0.9078104993597952}],
'www.cnn.com;2010': [{'a': 0.5494505494505495,
'f1': 0.6261398176291794,
'p': 0.46587171052631576,
'r': 0.9545071609098568},
{'a': 0.4099186361875242,
'f1': 0.4241965973534972,
'p': 0.27473065621939274,
'r': 0.9303482587064676},
{'a': 0.5331113577506474,
'f1': 0.5991105463786531,
'p': 0.43758700696055686,
'r': 0.9496475327291037},
{'a': 0.6585735963581184,
'f1': 0.7060360595766919,
'p': 0.5659824046920822,
'r': 0.9381944444444444},
{'a': 0.33472803347280333,
'f1': 0.17723156532988357,
'p': 0.09970887918486172,
'r': 0.7965116279069767},
{'a': 0.3881363848668846,
'f1': 0.3336724313326551,
'p': 0.205,
'r': 0.8961748633879781},
{'a': 0.4603285564487941,
'f1': 0.3949843260188088,
'p': 0.25467407781707935,
'r': 0.8795811518324608},
{'a': 0.4793354943273906,
'f1': 0.5134418780764862,
'p': 0.35294117647058826,
'r': 0.9416666666666667},
{'a': 0.5041876046901173,
'f1': 0.5425038639876353,
'p': 0.381936887921654,
'r': 0.936},
{'a': 0.5866220735785953,
'f1': 0.6679204728640515,
'p': 0.5164104694640631,
'r': 0.9452471482889734},
{'a': 0.34926306386779815,
'f1': 0.2711355677838919,
'p': 0.16064018968583285,
'r': 0.8685897435897436},
{'a': 0.4245134139926355,
'f1': 0.5015945330296128,
'p': 0.3404452690166976,
'r': 0.9524221453287197},
{'a': 0.46664167916041976,
'f1': 0.5064169268123483,
'p': 0.3479504289799809,
'r': 0.9299363057324841},
{'a': 0.5329356201839424,
'f1': 0.6343646623856781,
'p': 0.4719166184134337,
'r': 0.9673590504451038},
{'a': 0.2106007976328316,
'f1': 0.10813953488372094,
'p': 0.05780885780885781,
'r': 0.8359550561797753},
{'a': 0.3769322235434007,
'f1': 0.35361842105263164,
'p': 0.2203997949769349,
'r': 0.893970893970894},
{'a': 0.5659203980099502,
'f1': 0.654284299157999,
'p': 0.49886706948640486,
'r': 0.9503597122302159},
{'a': 0.42406582104902296,
'f1': 0.46188340807174894,
'p': 0.3072006817213464,
'r': 0.9303225806451613},
{'a': 0.65323897659227,
'f1': 0.7479224376731303,
'p': 0.6098741529525653,
'r': 0.9667519181585678},
{'a': 0.3857262452909167,
'f1': 0.3581893724032364,
'p': 0.22219207813347802,
'r': 0.923337091319053},
{'a': 0.1132858837485172,
'f1': 0.07114010562286424,
'p': 0.03716024340770791,
'r': 0.8312159709618875},
{'a': 0.5573596893752206,
'f1': 0.6309593878752207,
'p': 0.4722466960352423,
'r': 0.950354609929078},
{'a': 0.42759795570698467,
'f1': 0.4241645244215938,
'p': 0.27684563758389263,
'r': 0.9065934065934066},
{'a': 0.5144295302013423,
'f1': 0.5789933081175443,
'p': 0.42232597623089985,
'r': 0.9204440333024977},
{'a': 0.422525358516964,
'f1': 0.45457548728113645,
'p': 0.3017543859649123,
'r': 0.9210174029451138}],
'www.cnn.com;2015': [{'a': 0.5471204188481675,
'f1': 0.6247288503253796,
'p': 0.47420417124039516,
'r': 0.9152542372881356},
{'a': 0.31329597554763117,
'f1': 0.3623462630085147,
'p': 0.22784057108863773,
'r': 0.8845265588914549},
{'a': 0.38695163104611924,
'f1': 0.3345543345543346,
'p': 0.21044546850998463,
'r': 0.8154761904761905},
{'a': 0.23784686000417274,
'f1': 0.29844440176685233,
'p': 0.1796116504854369,
'r': 0.8819523269012486},
{'a': 0.2918340026773762,
'f1': 0.3423124740986324,
'p': 0.21223021582733814,
'r': 0.8843683083511777},
{'a': 0.4865805168986084,
'f1': 0.513424399434762,
'p': 0.3684922244759973,
'r': 0.8462732919254659},
{'a': 0.44073455759599334,
'f1': 0.4450579790171175,
'p': 0.30097087378640774,
'r': 0.8538135593220338},
{'a': 0.5405405405405406,
'f1': 0.5957115009746589,
'p': 0.44522144522144524,
'r': 0.8998822143698468},
{'a': 0.22325189553496208,
'f1': 0.2694136291600634,
'p': 0.15891563449404067,
'r': 0.8842652795838751},
{'a': 0.1092596099223632,
'f1': 0.07401574803149606,
'p': 0.039101497504159734,
'r': 0.6911764705882353},
{'a': 0.3040259139287367,
'f1': 0.3345132743362832,
'p': 0.20735052111903457,
'r': 0.8649885583524027},
{'a': 0.24288256227758007,
'f1': 0.07197382769901854,
'p': 0.042801556420233464,
'r': 0.22602739726027396},
{'a': 0.5373284231670572,
'f1': 0.5501302083333334,
'p': 0.400853889943074,
'r': 0.8765560165975104},
{'a': 0.6359154929577465,
'f1': 0.7318464730290457,
'p': 0.5966173361522199,
'r': 0.9463447350771295},
{'a': 0.46788685524126455,
'f1': 0.594059405940594,
'p': 0.430939226519337,
'r': 0.9558823529411765},
{'a': 0.5060650169820475,
'f1': 0.5634648370497427,
'p': 0.4093457943925234,
'r': 0.9037138927097662},
{'a': 0.2033702677746999,
'f1': 0.22848200312989042,
'p': 0.13204134366925063,
'r': 0.8474295190713101},
{'a': 0.4042042042042042,
'f1': 0.3608247422680412,
'p': 0.231980115990058,
'r': 0.8115942028985508},
{'a': 0.5222109189555781,
'f1': 0.5447495961227787,
'p': 0.3904585456229736,
'r': 0.9006410256410257},
{'a': 0.3004646360351058,
'f1': 0.3738447319778188,
'p': 0.23766157461809637,
'r': 0.8755411255411255},
{'a': 0.23959151610369206,
'f1': 0.2525096525096525,
'p': 0.14796380090497738,
'r': 0.8605263157894737},
{'a': 0.6295835922933499,
'f1': 0.7112403100775194,
'p': 0.5723196881091618,
'r': 0.9392194497760716},
{'a': 0.44345794392523363,
'f1': 0.41068777832756054,
'p': 0.2679147837314396,
'r': 0.8792372881355932},
{'a': 0.5817289543223858,
'f1': 0.6741176470588235,
'p': 0.5266544117647058,
'r': 0.9362745098039216},
{'a': 0.46741573033707867,
'f1': 0.4077961019490255,
'p': 0.2732752846617549,
'r': 0.8031496062992126}],
'www.esquire.com;2000': [{'a': 0.9610738255033557,
'f1': 0.9452830188679244,
'p': 0.9488636363636364,
'r': 0.9417293233082706},
{'a': 0.9686935580975317,
'f1': 0.9656084656084656,
'p': 0.964332892998679,
'r': 0.9668874172185431},
{'a': 0.9276447105788423,
'f1': 0.9135360763267739,
'p': 0.8724373576309795,
'r': 0.9586983729662077},
{'a': 0.9525691699604744,
'f1': 0.920704845814978,
'p': 0.9146608315098468,
'r': 0.926829268292683},
{'a': 0.9559808612440192,
'f1': 0.9547689282202556,
'p': 0.9585389930898321,
'r': 0.951028403525955},
{'a': 0.9541984732824428,
'f1': 0.9292730844793713,
'p': 0.923828125,
'r': 0.9347826086956522},
{'a': 0.8704908338261383,
'f1': 0.7753846153846153,
'p': 0.6666666666666666,
'r': 0.9264705882352942},
{'a': 0.9344894026974951,
'f1': 0.9360301034807149,
'p': 0.9103385178408051,
'r': 0.9632139399806389},
{'a': 0.9213615023474179,
'f1': 0.8768382352941175,
'p': 0.8238341968911918,
'r': 0.93713163064833},
{'a': 0.9345150172328902,
'f1': 0.9380530973451326,
'p': 0.9238532110091743,
'r': 0.9526963103122044},
{'a': 0.9562043795620438,
'f1': 0.9343544857768051,
'p': 0.9405286343612335,
'r': 0.9282608695652174},
{'a': 0.9271255060728745,
'f1': 0.9012539184952978,
'p': 0.8582089552238806,
'r': 0.9488448844884488},
{'a': 0.9619429143715573,
'f1': 0.959051724137931,
'p': 0.9580193756727664,
'r': 0.9600862998921251},
{'a': 0.9228130360205832,
'f1': 0.8854961832061068,
'p': 0.8272583201267829,
'r': 0.9525547445255474},
{'a': 0.9208289054197663,
'f1': 0.8948482709950599,
'p': 0.8602442333785617,
'r': 0.9323529411764706},
{'a': 0.9557589626239512,
'f1': 0.9271356783919598,
'p': 0.9201995012468828,
'r': 0.9341772151898734},
{'a': 0.9644110275689223,
'f1': 0.9670227589410124,
'p': 0.9701770736253494,
'r': 0.9638888888888889},
{'a': 0.9276642738818333,
'f1': 0.9087108013937282,
'p': 0.8728246318607764,
'r': 0.9476744186046512},
{'a': 0.9042735042735043,
'f1': 0.8458715596330274,
'p': 0.7760942760942761,
'r': 0.9294354838709677},
{'a': 0.8346007604562737,
'f1': 0.6329113924050633,
'p': 0.4891304347826087,
'r': 0.896414342629482},
{'a': 0.9553314121037464,
'f1': 0.9345991561181434,
'p': 0.9425531914893617,
'r': 0.9267782426778243},
{'a': 0.9652817302219693,
'f1': 0.963841138114997,
'p': 0.9621301775147929,
'r': 0.9655581947743468},
{'a': 0.9293800539083558,
'f1': 0.9080701754385965,
'p': 0.8649732620320856,
'r': 0.9556868537666174},
{'a': 0.1,
'f1': 0.09999999999999999,
'p': 0.05555555555555555,
'r': 0.5},
{'a': 0.919586444572085,
'f1': 0.8780487804878049,
'p': 0.8275862068965517,
'r': 0.935064935064935}],
'www.esquire.com;2005': [{'a': 0.9534395973154363,
'f1': 0.9468645284825277,
'p': 0.9620622568093385,
'r': 0.9321394910461829},
{'a': 0.9485544217687075,
'f1': 0.9395302348825587,
'p': 0.962128966223132,
'r': 0.91796875},
{'a': 0.9613180515759312,
'f1': 0.9429978888106968,
'p': 0.9463276836158192,
'r': 0.9396914446002805},
{'a': 0.9636363636363636,
'f1': 0.9542857142857142,
'p': 0.9553775743707094,
'r': 0.9531963470319634},
{'a': 0.9630901287553648,
'f1': 0.9570858283433133,
'p': 0.9609218436873748,
'r': 0.9532803180914513},
{'a': 0.918958031837916,
'f1': 0.8773722627737227,
'p': 0.8812316715542522,
'r': 0.873546511627907},
{'a': 0.8950998185117968,
'f1': 0.9101647497668636,
'p': 0.8496807893209518,
'r': 0.9799196787148594},
{'a': 0.9192789968652038,
'f1': 0.9053308823529411,
'p': 0.8771148708815673,
'r': 0.9354226020892688},
{'a': 0.9599483204134367,
'f1': 0.9531957725213891,
'p': 0.960446247464503,
'r': 0.9460539460539461},
{'a': 0.9651855245075585,
'f1': 0.9563218390804596,
'p': 0.9530355097365406,
'r': 0.9596309111880046},
{'a': 0.9460927705808608,
'f1': 0.9340827797649464,
'p': 0.9260385005065856,
'r': 0.9422680412371134},
{'a': 0.9168377823408624,
'f1': 0.8620102214650767,
'p': 0.8295081967213115,
'r': 0.8971631205673759},
{'a': 0.8992084432717679,
'f1': 0.813658536585366,
'p': 0.7679558011049724,
'r': 0.8651452282157677},
{'a': 0.8835955056179775,
'f1': 0.8746976294146106,
'p': 0.8166214995483289,
'r': 0.9416666666666667},
{'a': 0.9610503282275711,
'f1': 0.9533787323205867,
'p': 0.958904109589041,
'r': 0.9479166666666666},
{'a': 0.8831544178364987,
'f1': 0.8569984840828703,
'p': 0.7977422389463782,
'r': 0.925764192139738},
{'a': 0.9586449626044875,
'f1': 0.9499467518636849,
'p': 0.958109559613319,
'r': 0.941921858500528},
{'a': 0.9644351464435147,
'f1': 0.96,
'p': 0.9631728045325779,
'r': 0.9568480300187617},
{'a': 0.9675629534784465,
'f1': 0.9627450980392156,
'p': 0.9618021547502449,
'r': 0.9636898920510304},
{'a': 0.9702861335289802,
'f1': 0.9710610932475884,
'p': 0.9721030042918455,
'r': 0.9700214132762313},
{'a': 0.9470559129143988,
'f1': 0.9219547775346462,
'p': 0.9418777943368107,
'r': 0.9028571428571428},
{'a': 0.948937908496732,
'f1': 0.943155979990905,
'p': 0.9308797127468582,
'r': 0.9557603686635945},
{'a': 0.9545071609098568,
'f1': 0.9477250726040658,
'p': 0.9626352015732547,
'r': 0.9332697807435653},
{'a': 0.9579974543911752,
'f1': 0.9514943655071044,
'p': 0.9623389494549058,
'r': 0.9408914728682171},
{'a': 0.9658650116369278,
'f1': 0.9645732689210951,
'p': 0.9684721099434115,
'r': 0.9607056936647955}],
'www.esquire.com;2010': [{'a': 0.46971279373368147,
'f1': 0.40596665691722733,
'p': 0.26387832699619773,
'r': 0.8795944233206591},
{'a': 0.6291847183930681,
'f1': 0.6925714285714286,
'p': 0.5470724787206603,
'r': 0.9435053380782918},
{'a': 0.43822393822393824,
'f1': 0.2085222121486854,
'p': 0.11954261954261955,
'r': 0.8156028368794326},
{'a': 0.5483954451345756,
'f1': 0.5367666578178922,
'p': 0.381797583081571,
'r': 0.903485254691689},
{'a': 0.5808936825885979,
'f1': 0.5828220858895705,
'p': 0.4314912944738834,
'r': 0.8976377952755905},
{'a': 0.5229328165374677,
'f1': 0.42551536367172305,
'p': 0.2812339331619537,
'r': 0.8738019169329073},
{'a': 0.4098018769551616,
'f1': 0.2907268170426065,
'p': 0.17417417417417416,
'r': 0.8787878787878788},
{'a': 0.5787373004354136,
'f1': 0.557039297977871,
'p': 0.40286975717439294,
'r': 0.9023485784919654},
{'a': 0.5422609614368726,
'f1': 0.5276642136822022,
'p': 0.36960672012218404,
'r': 0.9219047619047619},
{'a': 0.5766440439971917,
'f1': 0.59484882418813,
'p': 0.44340567612687815,
'r': 0.9034013605442177},
{'a': 0.5088725088725089,
'f1': 0.4533576420540869,
'p': 0.306869600987248,
'r': 0.8674418604651163},
{'a': 0.5353200883002207,
'f1': 0.5014801657785671,
'p': 0.34784394250513345,
'r': 0.8981972428419936},
{'a': 0.5931351114997814,
'f1': 0.6309736268094388,
'p': 0.4803743961352657,
'r': 0.9191218948584633},
{'a': 0.4420245398773006,
'f1': 0.2852652259332023,
'p': 0.17098445595854922,
'r': 0.8601895734597157},
{'a': 0.5651650204769935,
'f1': 0.5832371276841376,
'p': 0.42683338965866846,
'r': 0.9205539358600583},
{'a': 0.42402574186628533,
'f1': 0.19085886489201404,
'p': 0.10764872521246459,
'r': 0.8407079646017699},
{'a': 0.34157875591124043,
'f1': 0.16435826408125578,
'p': 0.09067753438614366,
'r': 0.8768472906403941},
{'a': 0.489355497229513,
'f1': 0.4711567502265176,
'p': 0.31733116354759966,
'r': 0.9144196951934349},
{'a': 0.5622435020519836,
'f1': 0.5500562429696287,
'p': 0.39788445890968266,
'r': 0.8907103825136612},
{'a': 0.6142929548910289,
'f1': 0.6501149425287357,
'p': 0.5001768659356208,
'r': 0.9284307288246881},
{'a': 0.3872325499824623,
'f1': 0.2569119523606975,
'p': 0.15024875621890546,
'r': 0.8856304985337243},
{'a': 0.36039188243526943,
'f1': 0.23833333333333334,
'p': 0.1370388116914231,
'r': 0.9137380191693291},
{'a': 0.6539984164687253,
'f1': 0.7313249308330771,
'p': 0.5959418837675351,
'r': 0.9463007159904535},
{'a': 0.5495602567149989,
'f1': 0.5723313021891221,
'p': 0.4143790849673203,
'r': 0.924872355944566},
{'a': 0.5929073856975381,
'f1': 0.6246960281005133,
'p': 0.46991869918699186,
'r': 0.9315068493150684}],
'www.esquire.com;2015': [{'a': 0.27425818147123365,
'f1': 0.014382896015549079,
'p': 0.007266300078554596,
'r': 0.6981132075471698},
{'a': 0.2793508936970837,
'f1': 0.0336802270577105,
'p': 0.0172335893629381,
'r': 0.7375690607734806},
{'a': 0.2674749005493465,
'f1': 0.014525993883792047,
'p': 0.007333547764554519,
'r': 0.7549668874172185},
{'a': 0.42544974082731984,
'f1': 0.07645809508250286,
'p': 0.04026152787336545,
'r': 0.7572815533980582},
{'a': 0.30717893217893216,
'f1': 0.13187930839642897,
'p': 0.07145481263776635,
'r': 0.8543191800878477},
{'a': 0.32544611616515046,
'f1': 0.18967057216413596,
'p': 0.10621395786748264,
'r': 0.8852378616969102},
{'a': 0.27663516768575136,
'f1': 0.060409145607701574,
'p': 0.031347570875483954,
'r': 0.8283828382838284},
{'a': 0.27906207157377205,
'f1': 0.03199187507934493,
'p': 0.016350895406177006,
'r': 0.7368421052631579},
{'a': 0.3662221537038465,
'f1': 0.17300341983504328,
'p': 0.09600357222594329,
'r': 0.8739837398373984},
{'a': 0.48735537712587723,
'f1': 0.5452965091826721,
'p': 0.383287347260544,
'r': 0.9445361826129189},
{'a': 0.2843311506080449,
'f1': 0.051806407634628494,
'p': 0.02677599128819422,
'r': 0.7946768060836502},
{'a': 0.3768851837817355,
'f1': 0.21033422973492125,
'p': 0.11951538965291421,
'r': 0.876},
{'a': 0.28249249812453114,
'f1': 0.045292906606775214,
'p': 0.023333547599151506,
'r': 0.7690677966101694},
{'a': 0.3012342330123423,
'f1': 0.11548586471328831,
'p': 0.0620159803318992,
'r': 0.8380398671096345},
{'a': 0.29244199402710774,
'f1': 0.08431442502081103,
'p': 0.044404083422058,
'r': 0.8331374853113983},
{'a': 0.3413680261792641,
'f1': 0.09497696863347226,
'p': 0.050354692406093735,
'r': 0.8342967244701349},
{'a': 0.28442692199380454,
'f1': 0.05304347826086956,
'p': 0.027405172967075283,
'r': 0.8227360308285164},
{'a': 0.27631578947368424,
'f1': 0.024413503719244705,
'p': 0.01240791004265219,
'r': 0.7529411764705882},
{'a': 0.3443384886609504,
'f1': 0.09068681929317628,
'p': 0.04797178130511464,
'r': 0.8275862068965517},
{'a': 0.31721427309662603,
'f1': 0.16530799546509745,
'p': 0.09126132570338578,
'r': 0.8763594733829422},
{'a': 0.27594483927142655,
'f1': 0.023362112747587607,
'p': 0.011870201922456615,
'r': 0.7330677290836654},
{'a': 0.40731201259211564,
'f1': 0.303397241843256,
'p': 0.18236959158916297,
'r': 0.902},
{'a': 0.3228562709795545,
'f1': 0.18380537018548684,
'p': 0.10276749515247664,
'r': 0.8692842942345924},
{'a': 0.3184858154478297,
'f1': 0.17196877157576076,
'p': 0.0955331327078539,
'r': 0.8602550478214666},
{'a': 0.3326267731941953,
'f1': 0.05799769850402761,
'p': 0.03003933722732149,
'r': 0.8372093023255814},
{'a': 0.2796709753231492,
'f1': 0.034524034524034526,
'p': 0.017673998580919822,
'r': 0.7405405405405405}],
'www.forbes.com;2000': [{'a': 0.7463186077643909,
'f1': 0.7745389649018442,
'p': 0.6458333333333334,
'r': 0.9673105497771174},
{'a': 0.7251631617113851,
'f1': 0.7444369521240729,
'p': 0.605927552140505,
'r': 0.965034965034965},
{'a': 0.8685196752029981,
'f1': 0.9172725486343092,
'p': 0.8670133729569094,
'r': 0.9737171464330413},
{'a': 0.7623066104078763,
'f1': 0.7911001236093943,
'p': 0.6708595387840671,
'r': 0.963855421686747},
{'a': 0.6395851339671564,
'f1': 0.4945454545454546,
'p': 0.3434343434343434,
'r': 0.8831168831168831},
{'a': 0.7552631578947369,
'f1': 0.757496740547588,
'p': 0.6579841449603624,
'r': 0.8924731182795699},
{'a': 0.807836822329576,
'f1': 0.8562248995983935,
'p': 0.7674586033117351,
'r': 0.9682107175295186},
{'a': 0.7013473053892215,
'f1': 0.6815642458100558,
'p': 0.5344180225281602,
'r': 0.9405286343612335},
{'a': 0.8113305613305614,
'f1': 0.8603308964986534,
'p': 0.7710344827586207,
'r': 0.9730200174064404},
{'a': 0.9313490158425348,
'f1': 0.9560945655511206,
'p': 0.9278903456495828,
'r': 0.9860671310956302},
{'a': 0.700493305144468,
'f1': 0.7091033538672142,
'p': 0.5704845814977973,
'r': 0.9367088607594937},
{'a': 0.8956254796623178,
'f1': 0.933300637567435,
'p': 0.8888369920597852,
'r': 0.982447083118224},
{'a': 0.6883116883116883,
'f1': 0.6660869565217391,
'p': 0.5140939597315436,
'r': 0.945679012345679},
{'a': 0.705710102489019,
'f1': 0.6926605504587156,
'p': 0.552439024390244,
'r': 0.9282786885245902},
{'a': 0.624633431085044,
'f1': 0.43529411764705883,
'p': 0.31092436974789917,
'r': 0.7254901960784313},
{'a': 0.7604284103720406,
'f1': 0.8098434004474274,
'p': 0.7042801556420234,
'r': 0.9526315789473684},
{'a': 0.7866458007303078,
'f1': 0.8381480015829047,
'p': 0.7405594405594406,
'r': 0.9653600729261622},
{'a': 0.7743128964059197,
'f1': 0.8273352203801051,
'p': 0.7296718972895863,
'r': 0.9551820728291317},
{'a': 0.7612748045700541,
'f1': 0.7983748095479939,
'p': 0.6852659110723627,
'r': 0.9562043795620438},
{'a': 0.5644916540212443,
'f1': 0.29310344827586204,
'p': 0.17708333333333334,
'r': 0.85},
{'a': 0.9127272727272727,
'f1': 0.84,
'p': 0.7777777777777778,
'r': 0.9130434782608695},
{'a': 0.7091172214182344,
'f1': 0.6995515695067265,
'p': 0.5571428571428572,
'r': 0.9397590361445783},
{'a': 0.8019145802650958,
'f1': 0.829854522454143,
'p': 0.7337807606263982,
'r': 0.9548762736535662},
{'a': 0.7129888268156425,
'f1': 0.7363694676074407,
'p': 0.5985401459854015,
'r': 0.9566666666666667},
{'a': 0.8698095535435529,
'f1': 0.9182192586781721,
'p': 0.8635189966801918,
'r': 0.9803182579564489},
{'a': 0.7464406779661017,
'f1': 0.7794811320754718,
'p': 0.6590229312063809,
'r': 0.9538239538239538},
{'a': 0.7345767575322812,
'f1': 0.7226386806596701,
'p': 0.5863746958637469,
'r': 0.94140625},
{'a': 0.8735135135135135,
'f1': 0.9135893648449039,
'p': 0.8554633471645919,
'r': 0.9801901743264659}],
'www.forbes.com;2005': [{'a': 0.6573502722323049,
'f1': 0.2601880877742947,
'p': 0.15485074626865672,
'r': 0.8137254901960784},
{'a': 0.7343458566480149,
'f1': 0.6780755569906362,
'p': 0.5373592630501536,
'r': 0.9186351706036745},
{'a': 0.6820128479657388,
'f1': 0.3816793893129771,
'p': 0.24752475247524752,
'r': 0.8333333333333334},
{'a': 0.6730881494454174,
'f1': 0.4800371402042712,
'p': 0.3288804071246819,
'r': 0.8883161512027491},
{'a': 0.7079758500158881,
'f1': 0.5575349061145883,
'p': 0.39766483516483514,
'r': 0.9323671497584541},
{'a': 0.7032007759456838,
'f1': 0.5586538461538462,
'p': 0.39903846153846156,
'r': 0.9310897435897436},
{'a': 0.6664367023111418,
'f1': 0.337217272104181,
'p': 0.2120689655172414,
'r': 0.822742474916388},
{'a': 0.7016661427224143,
'f1': 0.5266832917705736,
'p': 0.3715693173821253,
'r': 0.9041095890410958},
{'a': 0.7392296718972896,
'f1': 0.6783954961294862,
'p': 0.5253405994550409,
'r': 0.9572989076464746},
{'a': 0.5082893745290128,
'f1': 0.0867739678096571,
'p': 0.04599406528189911,
'r': 0.7654320987654321},
{'a': 0.6257179323548181,
'f1': 0.2980251346499102,
'p': 0.1822840409956076,
'r': 0.8163934426229508},
{'a': 0.6676279740447008,
'f1': 0.33091436865021767,
'p': 0.20670897552130554,
'r': 0.8290909090909091},
{'a': 0.6678069175218548,
'f1': 0.23467600700525396,
'p': 0.13814432989690723,
'r': 0.7790697674418605},
{'a': 0.7126618250710451,
'f1': 0.5759552656104381,
'p': 0.42155525238744884,
'r': 0.9088235294117647},
{'a': 0.7034482758620689,
'f1': 0.5571161048689138,
'p': 0.40669856459330145,
'r': 0.8841010401188707},
{'a': 0.6676703048550997,
'f1': 0.29076305220883536,
'p': 0.1753875968992248,
'r': 0.8497652582159625},
{'a': 0.7393617021276596,
'f1': 0.5967078189300411,
'p': 0.441400304414003,
'r': 0.9206349206349206},
{'a': 0.7517774343122102,
'f1': 0.6590233545647559,
'p': 0.5091863517060368,
'r': 0.9338146811070999},
{'a': 0.7175792507204611,
'f1': 0.6168881939014855,
'p': 0.4668639053254438,
'r': 0.9089861751152074},
{'a': 0.7205791743684535,
'f1': 0.6037570991699432,
'p': 0.44841012329656066,
'r': 0.9237967914438503},
{'a': 0.6773299748110831,
'f1': 0.5809617271835132,
'p': 0.4275397207510833,
'r': 0.9061224489795918},
{'a': 0.6985084100285623,
'f1': 0.4973544973544974,
'p': 0.34814814814814815,
'r': 0.8703703703703703},
{'a': 0.7009054011863878,
'f1': 0.5438095238095239,
'p': 0.38896457765667575,
'r': 0.9034810126582279},
{'a': 0.7513007284079084,
'f1': 0.7188235294117649,
'p': 0.5841300191204589,
'r': 0.9342507645259939},
{'a': 0.7075163398692811,
'f1': 0.6288873531444368,
'p': 0.47321892875715027,
'r': 0.937178166838311}],
'www.forbes.com;2010': [{'a': 0.6800670016750419,
'f1': 0.3858520900321543,
'p': 0.27713625866050806,
'r': 0.6349206349206349},
{'a': 0.7875422501207147,
'f1': 0.8056537102473499,
'p': 0.7042471042471042,
'r': 0.9411764705882353},
{'a': 0.7325049051667757,
'f1': 0.6730615507593924,
'p': 0.529559748427673,
'r': 0.9232456140350878},
{'a': 0.7314510833880499,
'f1': 0.689445709946849,
'p': 0.551640340218712,
'r': 0.9190283400809717},
{'a': 0.8021814576103123,
'f1': 0.823529411764706,
'p': 0.7336485421591804,
'r': 0.938508064516129},
{'a': 0.741650913673598,
'f1': 0.7079772079772079,
'p': 0.5819672131147541,
'r': 0.9036363636363637},
{'a': 0.7209994051160024,
'f1': 0.6304176516942475,
'p': 0.49813200498132004,
'r': 0.8583690987124464},
{'a': 0.7506987143655673,
'f1': 0.755750273822563,
'p': 0.6210621062106211,
'r': 0.965034965034965},
{'a': 0.7306338028169014,
'f1': 0.7189222290263318,
'p': 0.5875875875875876,
'r': 0.9258675078864353},
{'a': 0.8244776119402986,
'f1': 0.7402826855123675,
'p': 0.6476043276661515,
'r': 0.8639175257731959},
{'a': 0.8683333333333333,
'f1': 0.8603417796110784,
'p': 0.7891891891891892,
'r': 0.9455958549222798},
{'a': 0.7801792303637322,
'f1': 0.7928464977645305,
'p': 0.6849785407725322,
'r': 0.9410377358490566},
{'a': 0.7241821680564464,
'f1': 0.689306358381503,
'p': 0.5438996579247435,
'r': 0.9408284023668639},
{'a': 0.7560414269275029,
'f1': 0.7476190476190476,
'p': 0.6217821782178218,
'r': 0.9373134328358209},
{'a': 0.7150537634408602,
'f1': 0.6809364548494983,
'p': 0.5391949152542372,
'r': 0.9237749546279492},
{'a': 0.6862021227503461,
'f1': 0.7183098591549295,
'p': 0.5753151957531519,
'r': 0.9558985667034179},
{'a': 0.7379713914174252,
'f1': 0.6907137375287798,
'p': 0.5501222493887531,
'r': 0.9278350515463918},
{'a': 0.742494996664443,
'f1': 0.7044410413476263,
'p': 0.5665024630541872,
'r': 0.9311740890688259},
{'a': 0.6059735784032165,
'f1': 0.561941251596424,
'p': 0.4010938924339107,
'r': 0.9381663113006397},
{'a': 0.6972809667673716,
'f1': 0.6561427590940289,
'p': 0.5042194092827004,
'r': 0.9390962671905697},
{'a': 0.7626683771648493,
'f1': 0.744121715076072,
'p': 0.6127562642369021,
'r': 0.9471830985915493},
{'a': 0.541839762611276,
'f1': 0.1804670912951168,
'p': 0.1022864019253911,
'r': 0.7657657657657657},
{'a': 0.5964601769911504,
'f1': 0.3473282442748092,
'p': 0.2153846153846154,
'r': 0.896551724137931},
{'a': 0.7361899845121321,
'f1': 0.7402135231316725,
'p': 0.6112510495382032,
'r': 0.9381443298969072},
{'a': 0.7512531328320802,
'f1': 0.735861610113107,
'p': 0.6043715846994535,
'r': 0.9404761904761905}],
'www.forbes.com;2015': [{'a': 0.46043498380379455,
'f1': 0.5317269076305221,
'p': 0.3688022284122563,
'r': 0.9525179856115108},
{'a': 0.3647180548370409,
'f1': 0.3785425101214575,
'p': 0.2380649267982177,
'r': 0.9234567901234568},
{'a': 0.34372003835091086,
'f1': 0.35697510568341945,
'p': 0.2210587550901687,
'r': 0.926829268292683},
{'a': 0.36551368094992254,
'f1': 0.40713941148094546,
'p': 0.25794621026894865,
'r': 0.965675057208238},
{'a': 0.374563784412563,
'f1': 0.43778319972115715,
'p': 0.2841628959276018,
'r': 0.952959028831563},
{'a': 0.42162396873643077,
'f1': 0.491214667685256,
'p': 0.33042137718396714,
'r': 0.9568452380952381},
{'a': 0.4944915254237288,
'f1': 0.5841756709654932,
'p': 0.41962944416624937,
'r': 0.9610091743119266},
{'a': 0.39051841746248295,
'f1': 0.472082717872969,
'p': 0.31333333333333335,
'r': 0.9568862275449102},
{'a': 0.3383084577114428,
'f1': 0.36363636363636365,
'p': 0.22570194384449244,
'r': 0.9351230425055929},
{'a': 0.21129220023282888,
'f1': 0.2144927536231884,
'p': 0.12259774685222001,
'r': 0.8564814814814815},
{'a': 0.30982658959537573,
'f1': 0.2565379825653798,
'p': 0.15047479912344777,
'r': 0.869198312236287},
{'a': 0.47364370911889186,
'f1': 0.5665399239543727,
'p': 0.4037940379403794,
'r': 0.9490445859872612},
{'a': 0.4983351831298557,
'f1': 0.6,
'p': 0.43554603854389723,
'r': 0.9639810426540284},
{'a': 0.46600741656365885,
'f1': 0.5531034482758621,
'p': 0.3889427740058196,
'r': 0.9570405727923628},
{'a': 0.3760532150776053,
'f1': 0.43742502998800475,
'p': 0.28239545689210116,
'r': 0.9698581560283688},
{'a': 0.3777668470241023,
'f1': 0.4080486663547028,
'p': 0.26139088729016785,
'r': 0.929637526652452},
{'a': 0.3453741245853299,
'f1': 0.3991880920162382,
'p': 0.25267665952890794,
'r': 0.9500805152979066},
{'a': 0.3266555370061213,
'f1': 0.2965116279069767,
'p': 0.17782426778242677,
'r': 0.8916083916083916},
{'a': 0.3740053050397878,
'f1': 0.4234527687296417,
'p': 0.27325275880189176,
'r': 0.9403254972875226},
{'a': 0.4581497797356828,
'f1': 0.5473402475744397,
'p': 0.3827795975666823,
'r': 0.960093896713615},
{'a': 0.27863163113534956,
'f1': 0.2289348171701113,
'p': 0.13122721749696234,
'r': 0.8962655601659751},
{'a': 0.41493383742911155,
'f1': 0.46866952789699573,
'p': 0.31217838765008576,
'r': 0.9397590361445783},
{'a': 0.19070208728652752,
'f1': 0.08081896551724138,
'p': 0.042492917847025496,
'r': 0.8241758241758241},
{'a': 0.4654213036565978,
'f1': 0.5559590623968306,
'p': 0.391810144253141,
'r': 0.9568181818181818},
{'a': 0.41478636581853096,
'f1': 0.46838203227213265,
'p': 0.3111239860950174,
'r': 0.9470899470899471}],
'www.foxnews.com;2000': [{'a': 0.9328201539538139,
'f1': 0.8940397350993378,
'p': 0.8472803347280334,
'r': 0.9462616822429907},
{'a': 0.8866481223922114,
'f1': 0.7323481116584564,
'p': 0.6076294277929155,
'r': 0.9214876033057852},
{'a': 0.9392950391644909,
'f1': 0.9167412712623098,
'p': 0.8797250859106529,
'r': 0.9570093457943926},
{'a': 0.8789237668161435,
'f1': 0.8645484949832776,
'p': 0.8027950310559007,
'r': 0.9365942028985508},
{'a': 0.9260485651214128,
'f1': 0.8494382022471911,
'p': 0.7777777777777778,
'r': 0.9356435643564357},
{'a': 0.9224841341795105,
'f1': 0.939936775553214,
'p': 0.9034436191762323,
'r': 0.9795021961932651},
{'a': 0.7778364116094987,
'f1': 0.7885484681064792,
'p': 0.6715141146278871,
'r': 0.9549878345498783},
{'a': 0.9259950248756219,
'f1': 0.9037995149555377,
'p': 0.8482549317147192,
'r': 0.967128027681661},
{'a': 0.9148550724637681,
'f1': 0.9316661820296599,
'p': 0.8875346260387812,
'r': 0.9804161566707467},
{'a': 0.7328767123287672,
'f1': 0.6481609993060375,
'p': 0.49417989417989416,
'r': 0.9415322580645161},
{'a': 0.9409317803660566,
'f1': 0.8536082474226805,
'p': 0.7781954887218046,
'r': 0.9452054794520548},
{'a': 0.8669673055242391,
'f1': 0.8244047619047619,
'p': 0.725130890052356,
'r': 0.9551724137931035},
{'a': 0.9381598793363499,
'f1': 0.8857938718662952,
'p': 0.8457446808510638,
'r': 0.9298245614035088},
{'a': 0.9487719298245614,
'f1': 0.9205658324265505,
'p': 0.8794178794178794,
'r': 0.9657534246575342},
{'a': 0.25,
'f1': 0.21052631578947367,
'p': 0.125,
'r': 0.6666666666666666},
{'a': 0.8834038551951104,
'f1': 0.8779527559055118,
'p': 0.8043282236248873,
'r': 0.9664138678223185},
{'a': 0.90527950310559,
'f1': 0.900489396411093,
'p': 0.8523780111179741,
'r': 0.9543568464730291},
{'a': 0.9191583610188261,
'f1': 0.9093167701863353,
'p': 0.8776978417266187,
'r': 0.9432989690721649},
{'a': 0.9488078541374474,
'f1': 0.9215896885069816,
'p': 0.8755102040816326,
'r': 0.9727891156462585},
{'a': 0.9846780316101871,
'f1': 0.9917606888361045,
'p': 0.9949363318191973,
'r': 0.9886052534221236},
{'a': 0.9455314648334214,
'f1': 0.9438079650845608,
'p': 0.9211927582534611,
'r': 0.9675615212527964},
{'a': 0.9069767441860465,
'f1': 0.9270833333333334,
'p': 0.8933500627352572,
'r': 0.9634641407307172},
{'a': 0.9575163398692811,
'f1': 0.9686746987951808,
'p': 0.958156779661017,
'r': 0.9794260963724959},
{'a': 0.926481084939329,
'f1': 0.8781065088757397,
'p': 0.8171806167400881,
'r': 0.948849104859335},
{'a': 0.953526517222526,
'f1': 0.9404344779257182,
'p': 0.9255172413793104,
'r': 0.9558404558404558}],
'www.foxnews.com;2005': [{'a': 0.603609410248147,
'f1': 0.6689989235737351,
'p': 0.5077614379084967,
'r': 0.9802839116719243},
{'a': 0.44011976047904194,
'f1': 0.4590163934426229,
'p': 0.30164765525982257,
'r': 0.9596774193548387},
{'a': 0.4680177327422419,
'f1': 0.5053003533568905,
'p': 0.3436123348017621,
'r': 0.9543937708565072},
{'a': 0.47722132471728596,
'f1': 0.5135297654840649,
'p': 0.3531844499586435,
'r': 0.9405286343612335},
{'a': 0.8057761732851986,
'f1': 0.003703703703703704,
'p': 1.0,
'r': 0.0018552875695732839},
{'a': 0.88042203985932,
'f1': 0.006493506493506494,
'p': 1.0,
'r': 0.003257328990228013},
{'a': 0.5524052245938197,
'f1': 0.6109111049570757,
'p': 0.44710174300770167,
'r': 0.9641608391608392},
{'a': 0.8762091202210963,
'f1': 0.0018570102135561746,
'p': 1.0,
'r': 0.0009293680297397769},
{'a': 0.7714677983025462,
'f1': 0.8519925632527686,
'p': 0.7521050378193236,
'r': 0.982475764354959},
{'a': 0.5255948089401586,
'f1': 0.3037037037037037,
'p': 0.182453909726637,
'r': 0.9053627760252366},
{'a': 0.5425170068027211,
'f1': 0.5509181969949917,
'p': 0.38686987104337633,
'r': 0.9565217391304348},
{'a': 0.8503401360544217,
'f1': 0.0064516129032258064,
'p': 1.0,
'r': 0.003236245954692557},
{'a': 0.8011192423590185,
'f1': 0.004310344827586208,
'p': 1.0,
'r': 0.0021598272138228943},
{'a': 0.3963254593175853,
'f1': 0.2930327868852459,
'p': 0.1753525444512569,
'r': 0.8909657320872274},
{'a': 0.4789915966386555,
'f1': 0.5162064825930371,
'p': 0.3543469303667079,
'r': 0.9502762430939227},
{'a': 0.4625651041666667,
'f1': 0.492156259612427,
'p': 0.33222591362126247,
'r': 0.9489916963226572},
{'a': 0.46634615384615385,
'f1': 0.502539587690469,
'p': 0.33966074313408723,
'r': 0.965556831228473},
{'a': 0.5385525537369356,
'f1': 0.0008539709649871905,
'p': 1.0,
'r': 0.00042716787697565144},
{'a': 0.8520428401428005,
'f1': 0.005333333333333333,
'p': 1.0,
'r': 0.00267379679144385},
{'a': 0.4066265060240964,
'f1': 0.42058823529411765,
'p': 0.2712443095599393,
'r': 0.9358638743455497},
{'a': 0.6719000320410125,
'f1': 0.7054085155350976,
'p': 0.5610983981693364,
'r': 0.9496514329976762},
{'a': 0.4648160103292447,
'f1': 0.49420378279438676,
'p': 0.334020618556701,
'r': 0.9495896834701055},
{'a': 0.39645693341478316,
'f1': 0.4090909090909091,
'p': 0.26047220106626046,
'r': 0.9526462395543176},
{'a': 0.5206232813932172,
'f1': 0.5149922720247295,
'p': 0.3532654792196777,
'r': 0.9498289623717218},
{'a': 0.4681126578180641,
'f1': 0.4986267927982911,
'p': 0.33788254755996694,
'r': 0.9511059371362048}],
'www.foxnews.com;2010': [{'a': 0.6598122485471614,
'f1': 0.29861751152073734,
'p': 0.18346545866364666,
'r': 0.801980198019802},
{'a': 0.40350373348650204,
'f1': 0.4678452472457084,
'p': 0.3106498809118748,
'r': 0.9470954356846473},
{'a': 0.20753424657534247,
'f1': 0.16622627912563054,
'p': 0.09146180280200898,
'r': 0.9105263157894737},
{'a': 0.6648550724637681,
'f1': 0.26441351888667997,
'p': 0.15890083632019117,
'r': 0.7869822485207101},
{'a': 0.6775615130370914,
'f1': 0.5039548022598871,
'p': 0.3573717948717949,
'r': 0.8544061302681992},
{'a': 0.38892145369284875,
'f1': 0.4459208078660643,
'p': 0.2909153952843273,
'r': 0.9544937428896473},
{'a': 0.3192503176620076,
'f1': 0.328002508623393,
'p': 0.1987082066869301,
'r': 0.9389587073608617},
{'a': 0.3446740858505564,
'f1': 0.36798528058877644,
'p': 0.22839741149600304,
'r': 0.9463722397476341},
{'a': 0.6619359534206696,
'f1': 0.47365439093484424,
'p': 0.3247863247863248,
'r': 0.8744769874476988},
{'a': 0.3651210265383494,
'f1': 0.41241565452091766,
'p': 0.26481802426343154,
'r': 0.9317073170731708},
{'a': 0.281777471581123,
'f1': 0.24209378407851692,
'p': 0.13985720285594289,
'r': 0.9},
{'a': 0.26661926768574473,
'f1': 0.20500963391136803,
'p': 0.11575282854656223,
'r': 0.8956228956228957},
{'a': 0.6597249508840864,
'f1': 0.412483039348711,
'p': 0.2773722627737226,
'r': 0.8042328042328042},
{'a': 0.7581721147431622,
'f1': 0.7157977263818112,
'p': 0.5818992989165074,
'r': 0.929735234215886},
{'a': 0.7092941998602376,
'f1': 0.6093896713615023,
'p': 0.458981612446959,
'r': 0.9064245810055865},
{'a': 0.6978047641289117,
'f1': 0.30802139037433157,
'p': 0.18972332015810275,
'r': 0.8181818181818182},
{'a': 0.34171322160148976,
'f1': 0.36893781612615295,
'p': 0.22954461310625693,
'r': 0.9393939393939394},
{'a': 0.7627029858564693,
'f1': 0.7772861356932154,
'p': 0.6598497495826378,
'r': 0.9455741626794258},
{'a': 0.6573426573426573,
'f1': 0.30742049469964666,
'p': 0.19205298013245034,
'r': 0.7699115044247787},
{'a': 0.7223642172523962,
'f1': 0.6686999618757148,
'p': 0.5257793764988009,
'r': 0.9183246073298429},
{'a': 0.6867469879518072,
'f1': 0.44963503649635034,
'p': 0.3040473840078973,
'r': 0.8627450980392157},
{'a': 0.29747899159663865,
'f1': 0.2753120665742025,
'p': 0.16164495114006514,
'r': 0.927570093457944},
{'a': 0.7348339280812517,
'f1': 0.7353424657534247,
'p': 0.6077898550724637,
'r': 0.9306518723994452},
{'a': 0.2661596958174905,
'f1': 0.2226290735994141,
'p': 0.1267723102585488,
'r': 0.9129129129129129},
{'a': 0.7271911456775352,
'f1': 0.7011795543905636,
'p': 0.5685441020191286,
'r': 0.9145299145299145}],
'www.foxnews.com;2015': [{'a': 0.8228247162673392,
'f1': 0.8672649976381671,
'p': 0.8024475524475524,
'r': 0.9434737923946557},
{'a': 0.3855302279484638,
'f1': 0.4718909710391823,
'p': 0.3162100456621005,
'r': 0.9295302013422819},
{'a': 0.7244318181818182,
'f1': 0.5109243697478992,
'p': 0.37530864197530867,
'r': 0.8},
{'a': 0.6683467741935484,
'f1': 0.5923172242874845,
'p': 0.44014732965009207,
'r': 0.9053030303030303},
{'a': 0.7728337236533958,
'f1': 0.7236467236467238,
'p': 0.5971786833855799,
'r': 0.9180722891566265},
{'a': 0.6955475330926595,
'f1': 0.552212389380531,
'p': 0.4073107049608355,
'r': 0.8571428571428571},
{'a': 0.7093649085037675,
'f1': 0.64,
'p': 0.4948453608247423,
'r': 0.9056603773584906},
{'a': 0.7417962003454232,
'f1': 0.6276463262764632,
'p': 0.4931506849315068,
'r': 0.863013698630137},
{'a': 0.7535760728218466,
'f1': 0.7959073774905762,
'p': 0.6880819366852886,
'r': 0.9438058748403576},
{'a': 0.7333333333333333,
'f1': 0.4311111111111111,
'p': 0.29754601226993865,
'r': 0.782258064516129},
{'a': 0.6956140350877194,
'f1': 0.6831050228310502,
'p': 0.5451895043731778,
'r': 0.9144254278728606},
{'a': 0.7415865384615384,
'f1': 0.6814814814814816,
'p': 0.5463182897862233,
'r': 0.905511811023622},
{'a': 0.7833655705996132,
'f1': 0.7815344603381015,
'p': 0.6722595078299777,
'r': 0.9332298136645962},
{'a': 0.7554806070826307,
'f1': 0.7653721682847895,
'p': 0.6515151515151515,
'r': 0.9274509803921569},
{'a': 0.8192926045016077,
'f1': 0.8632603406326035,
'p': 0.7940913160250671,
'r': 0.9456289978678039},
{'a': 0.7952552862300155,
'f1': 0.8496781522150701,
'p': 0.7732598208132323,
'r': 0.9428571428571428},
{'a': 0.6947368421052632,
'f1': 0.5572519083969465,
'p': 0.41714285714285715,
'r': 0.8390804597701149},
{'a': 0.8304477611940299,
'f1': 0.8758741258741259,
'p': 0.8139723801787165,
'r': 0.9479659413434248},
{'a': 0.7150259067357513,
'f1': 0.736842105263158,
'p': 0.6111111111111112,
'r': 0.927710843373494},
{'a': 0.7894356005788712,
'f1': 0.8293255131964808,
'p': 0.7379958246346555,
'r': 0.9464524765729585},
{'a': 0.5150187734668336,
'f1': 0.6475670759436107,
'p': 0.48667122351332875,
'r': 0.967391304347826},
{'a': 0.7634961439588689,
'f1': 0.7722772277227723,
'p': 0.6657183499288762,
'r': 0.9194499017681729},
{'a': 0.6843082636954503,
'f1': 0.6537678207739308,
'p': 0.5152487961476726,
'r': 0.8941504178272981},
{'a': 0.8263041065482797,
'f1': 0.8490110950313555,
'p': 0.775330396475771,
'r': 0.9381663113006397},
{'a': 0.6750756811301716,
'f1': 0.6053921568627451,
'p': 0.4582560296846011,
'r': 0.8916967509025271},
{'a': 0.8098720292504571,
'f1': 0.8215102974828375,
'p': 0.7341513292433538,
'r': 0.9324675324675324}],
'www.latimes.com;2000': [{'a': 0.952054794520548,
'f1': 0.9619753086419753,
'p': 0.973026973026973,
'r': 0.951171875},
{'a': 0.960741548527808,
'f1': 0.9711075441412521,
'p': 0.9781729991915926,
'r': 0.9641434262948207},
{'a': 0.84994640943194,
'f1': 0.7704918032786885,
'p': 0.6811594202898551,
'r': 0.8867924528301887},
{'a': 0.9433198380566802,
'f1': 0.9593810444874274,
'p': 0.9538461538461539,
'r': 0.9649805447470817},
{'a': 0.9700534759358289,
'f1': 0.9808306709265177,
'p': 0.9862322166131253,
'r': 0.9754879709487063},
{'a': 0.9447640966628308,
'f1': 0.9554730983302412,
'p': 0.9716981132075472,
'r': 0.9397810218978102},
{'a': 0.9426751592356688,
'f1': 0.9540972458347502,
'p': 0.9790648988136776,
'r': 0.9303713527851459},
{'a': 0.6607508532423209,
'f1': 0.7291553133514986,
'p': 0.5809813287016934,
'r': 0.9787856620336504},
{'a': 0.9533718689788054,
'f1': 0.9577956051621903,
'p': 0.973758865248227,
'r': 0.942347288949897},
{'a': 0.9153766769865841,
'f1': 0.8900804289544235,
'p': 0.8736842105263158,
'r': 0.907103825136612},
{'a': 0.7662976629766297,
'f1': 0.7543103448275863,
'p': 0.616631430584919,
'r': 0.9711431742508324},
{'a': 0.9705093833780161,
'f1': 0.9814892721918386,
'p': 0.9873042742276766,
'r': 0.9757423672103722},
{'a': 0.9032069970845481,
'f1': 0.9145211122554068,
'p': 0.8996960486322189,
'r': 0.9298429319371728},
{'a': 0.9527659574468085,
'f1': 0.9672662931288706,
'p': 0.9838032393521295,
'r': 0.951276102088167},
{'a': 0.9543307086614173,
'f1': 0.9663962920046352,
'p': 0.9788732394366197,
'r': 0.954233409610984},
{'a': 0.9581294181620446,
'f1': 0.9692123150739704,
'p': 0.9758454106280193,
'r': 0.9626687847498014},
{'a': 0.965527950310559,
'f1': 0.9786907275868688,
'p': 0.9883675843350136,
'r': 0.9692015209125475},
{'a': 0.9492814877430262,
'f1': 0.9597044996642042,
'p': 0.9794379712131597,
'r': 0.9407504937458855},
{'a': 0.9283223556760946,
'f1': 0.9530814100938373,
'p': 0.9413827655310621,
'r': 0.9650744735490498},
{'a': 0.951894423158791,
'f1': 0.9666371420135814,
'p': 0.982003599280144,
'r': 0.9517441860465117},
{'a': 0.9497282608695652,
'f1': 0.9651710072168184,
'p': 0.969735182849937,
'r': 0.9606495940037476},
{'a': 0.9533622559652929,
'f1': 0.9648692810457516,
'p': 0.9736191261335532,
'r': 0.9562753036437247},
{'a': 0.9296527159394479,
'f1': 0.9177939646201873,
'p': 0.9423076923076923,
'r': 0.8945233265720081},
{'a': 0.9509415262636274,
'f1': 0.9649557522123893,
'p': 0.9652974504249292,
'r': 0.9646142958244869},
{'a': 0.9704292527821939,
'f1': 0.98138883329998,
'p': 0.9891085114965712,
'r': 0.9737887212073074}],
'www.latimes.com;2005': [{'a': 0.7275659824046921,
'f1': 0.7194201147689518,
'p': 0.5940149625935162,
'r': 0.9119448698315467},
{'a': 0.5853510895883777,
'f1': 0.4929681717246484,
'p': 0.34049079754601225,
'r': 0.8927613941018767},
{'a': 0.7218893197039126,
'f1': 0.7108831073653351,
'p': 0.5692488262910798,
'r': 0.9463414634146341},
{'a': 0.7391795257809559,
'f1': 0.7072243346007605,
'p': 0.5651586765698852,
'r': 0.9446952595936795},
{'a': 0.7825588066551922,
'f1': 0.723559445660102,
'p': 0.6019417475728155,
'r': 0.906764168190128},
{'a': 0.898930284028034,
'f1': 0.9250136836343732,
'p': 0.9130199891950297,
'r': 0.93732667775929},
{'a': 0.8402457757296466,
'f1': 0.7450980392156862,
'p': 0.6454352441613588,
'r': 0.881159420289855},
{'a': 0.6603614964270702,
'f1': 0.7000742390497401,
'p': 0.5547058823529412,
'r': 0.9486921529175051},
{'a': 0.7958236658932715,
'f1': 0.8233818364274962,
'p': 0.7293333333333333,
'r': 0.9452764976958525},
{'a': 0.752387448840382,
'f1': 0.8058823529411766,
'p': 0.7125295508274232,
'r': 0.9273846153846154},
{'a': 0.72677793904209,
'f1': 0.7404343329886246,
'p': 0.616532721010333,
'r': 0.9266609145815358},
{'a': 0.8592896174863388,
'f1': 0.9007946063086925,
'p': 0.8531356898517674,
'r': 0.9540933435348126},
{'a': 0.7138193688792165,
'f1': 0.684020824989988,
'p': 0.5384615384615384,
'r': 0.9374313940724479},
{'a': 0.6749663526244953,
'f1': 0.6826544021024967,
'p': 0.5383419689119171,
'r': 0.9326750448833034},
{'a': 0.7711995066296639,
'f1': 0.7659305993690851,
'p': 0.6615803814713896,
'r': 0.9093632958801499},
{'a': 0.7778964228510411,
'f1': 0.7329910141206675,
'p': 0.6193058568329718,
'r': 0.8977987421383647},
{'a': 0.7626443647949024,
'f1': 0.8051013734466972,
'p': 0.6923509561304837,
'r': 0.96171875},
{'a': 0.7578361981799798,
'f1': 0.7190615835777127,
'p': 0.6105577689243028,
'r': 0.8744650499286734},
{'a': 0.6359447004608295,
'f1': 0.666947723440135,
'p': 0.5119741100323625,
'r': 0.9564691656590084},
{'a': 0.619484240687679,
'f1': 0.3828996282527881,
'p': 0.25245098039215685,
'r': 0.7923076923076923},
{'a': 0.6725452812202097,
'f1': 0.6942590120160214,
'p': 0.5454545454545454,
'r': 0.9547123623011016},
{'a': 0.8025809994508512,
'f1': 0.8536535721555056,
'p': 0.7755177514792899,
'r': 0.9492983250339521},
{'a': 0.9104768083036003,
'f1': 0.9266737513283742,
'p': 0.9208025343189018,
'r': 0.932620320855615},
{'a': 0.7212666145426114,
'f1': 0.7436174038115786,
'p': 0.6180514046622834,
'r': 0.9332129963898917},
{'a': 0.6097704532077692,
'f1': 0.3556851311953353,
'p': 0.22932330827067668,
'r': 0.7922077922077922}],
'www.latimes.com;2010': [{'a': 0.5011997600479904,
'f1': 0.4609400324149109,
'p': 0.3108876257105378,
'r': 0.8909774436090225},
{'a': 0.4197498697238145,
'f1': 0.3846366399557889,
'p': 0.24455375966268447,
'r': 0.9003880983182406},
{'a': 0.4569489103251161,
'f1': 0.2943361188486536,
'p': 0.18011363636363636,
'r': 0.8045685279187818},
{'a': 0.45047241694605306,
'f1': 0.3599574014909479,
'p': 0.2274562584118439,
'r': 0.8622448979591837},
{'a': 0.8670289855072464,
'f1': 0.8772985623537279,
'p': 0.8388746803069054,
'r': 0.9194113524877365},
{'a': 0.861198738170347,
'f1': 0.8615263571990559,
'p': 0.8141263940520446,
'r': 0.9147869674185464},
{'a': 0.46867167919799496,
'f1': 0.43146603098927294,
'p': 0.2836990595611285,
'r': 0.900497512437811},
{'a': 0.6196874143131341,
'f1': 0.4199079882894187,
'p': 0.28361581920903955,
'r': 0.8083735909822867},
{'a': 0.8493035035880118,
'f1': 0.8385345997286295,
'p': 0.7875955819881053,
'r': 0.8965183752417795},
{'a': 0.8386295928500497,
'f1': 0.7890979883192732,
'p': 0.7111111111111111,
'r': 0.8862973760932945},
{'a': 0.548933984073979,
'f1': 0.13667649950835792,
'p': 0.07591480065537957,
'r': 0.6847290640394089},
{'a': 0.8466467321657412,
'f1': 0.8337193144974524,
'p': 0.78125,
'r': 0.8937437934458788},
{'a': 0.5137440758293839,
'f1': 0.46839378238341967,
'p': 0.31801125703564725,
'r': 0.8885976408912188},
{'a': 0.46932515337423314,
'f1': 0.4602184087363495,
'p': 0.3081476323119777,
'r': 0.9086242299794661},
{'a': 0.6018632017651385,
'f1': 0.39267015706806285,
'p': 0.25862068965517243,
'r': 0.8152173913043478},
{'a': 0.545144804088586,
'f1': 0.26175115207373273,
'p': 0.1561297416162727,
'r': 0.8091168091168092},
{'a': 0.752978276103714,
'f1': 0.5629262244265344,
'p': 0.4188191881918819,
'r': 0.8582230623818525},
{'a': 0.4967412864834231,
'f1': 0.47548730064973416,
'p': 0.32368315239244067,
'r': 0.8954393770856507},
{'a': 0.4060072501294666,
'f1': 0.33507246376811595,
'p': 0.20694593626924454,
'r': 0.8797564687975646},
{'a': 0.45971014492753626,
'f1': 0.40937896070975915,
'p': 0.2661722290894108,
'r': 0.8861454046639232},
{'a': 0.8541341653666147,
'f1': 0.8560431100846805,
'p': 0.8128654970760234,
'r': 0.9040650406504065},
{'a': 0.4808497905445841,
'f1': 0.4203140661543602,
'p': 0.275635407537248,
'r': 0.8846694796061885},
{'a': 0.4926829268292683,
'f1': 0.42647058823529405,
'p': 0.28361858190709044,
'r': 0.8592592592592593},
{'a': 0.8499546690843155,
'f1': 0.8256977356503422,
'p': 0.7604267701260912,
'r': 0.9032258064516129},
{'a': 0.582480433473811,
'f1': 0.19965377957299485,
'p': 0.11564171122994653,
'r': 0.729957805907173}],
'www.latimes.com;2015': [{'a': 0.5950798812385126,
'f1': 0.1722543352601156,
'p': 0.0962532299741602,
'r': 0.8186813186813187},
{'a': 0.612917208495882,
'f1': 0.17796870205584536,
'p': 0.10034602076124567,
'r': 0.7859078590785907},
{'a': 0.5957594235033259,
'f1': 0.1961972995315514,
'p': 0.111145800811739,
'r': 0.8356807511737089},
{'a': 0.31688511950655357,
'f1': 0.35548011639185256,
'p': 0.22025240384615385,
'r': 0.9208542713567839},
{'a': 0.2421244467586566,
'f1': 0.2185234899328859,
'p': 0.12507682851874616,
'r': 0.8641188959660298},
{'a': 0.601453488372093,
'f1': 0.13719320327249843,
'p': 0.07486263736263736,
'r': 0.8195488721804511},
{'a': 0.3392429324389075,
'f1': 0.3962346760070053,
'p': 0.25328855303666387,
'r': 0.9095477386934674},
{'a': 0.6055373198230342,
'f1': 0.18896713615023472,
'p': 0.10655195234943746,
'r': 0.8341968911917098},
{'a': 0.5928799002631944,
'f1': 0.19896429544835104,
'p': 0.11275872721655854,
'r': 0.8449074074074074},
{'a': 0.6061456245824983,
'f1': 0.2947368421052632,
'p': 0.17680826636050517,
'r': 0.8850574712643678},
{'a': 0.5855561861520999,
'f1': 0.12308616031221856,
'p': 0.06677524429967427,
'r': 0.7854406130268199},
{'a': 0.5880266704497091,
'f1': 0.13622843545508626,
'p': 0.0741820537738905,
'r': 0.8327272727272728},
{'a': 0.2408955223880597,
'f1': 0.19397781299524566,
'p': 0.10959885386819485,
'r': 0.8429752066115702},
{'a': 0.5995176620797277,
'f1': 0.175284837861525,
'p': 0.0982961992136304,
'r': 0.8086253369272237},
{'a': 0.6022187004754358,
'f1': 0.15900091379835518,
'p': 0.08817567567567568,
'r': 0.8080495356037152},
{'a': 0.6010422698320788,
'f1': 0.14885731933292157,
'p': 0.0820285908781484,
'r': 0.8033333333333333},
{'a': 0.6078093597473443,
'f1': 0.18496420047732695,
'p': 0.10413167618407793,
'r': 0.8266666666666667},
{'a': 0.6010044642857143,
'f1': 0.20950801547816472,
'p': 0.12016487000634116,
'r': 0.8168103448275862},
{'a': 0.6004129387474191,
'f1': 0.22976916954099233,
'p': 0.1328628413623811,
'r': 0.8490196078431372},
{'a': 0.2534607778510218,
'f1': 0.19480981158905084,
'p': 0.1099959855479727,
'r': 0.8509316770186336},
{'a': 0.6122622764689185,
'f1': 0.2251843448667045,
'p': 0.12944245190740136,
'r': 0.8649237472766884},
{'a': 0.6097852028639618,
'f1': 0.3138260666822103,
'p': 0.19075963718820863,
'r': 0.8843626806833115},
{'a': 0.2766504715633038,
'f1': 0.2712352433054996,
'p': 0.16020408163265307,
'r': 0.8836772983114447},
{'a': 0.2219861922464153,
'f1': 0.1883656509695291,
'p': 0.1059190031152648,
'r': 0.85},
{'a': 0.2403846153846154,
'f1': 0.20813679245283018,
'p': 0.11833724438484747,
'r': 0.863080684596577}],
'www.nymag.com;2000': [{'a': 0.925783397471138,
'f1': 0.9283819628647215,
'p': 0.8965163934426229,
'r': 0.9625962596259626},
{'a': 0.9344170403587444,
'f1': 0.9351081530782029,
'p': 0.9084051724137931,
'r': 0.9634285714285714},
{'a': 0.937906564163217,
'f1': 0.9441786283891548,
'p': 0.9347368421052632,
'r': 0.9538131041890441},
{'a': 0.9248601119104716,
'f1': 0.907843137254902,
'p': 0.8703007518796992,
'r': 0.9487704918032787},
{'a': 0.9275825346112886,
'f1': 0.8316831683168316,
'p': 0.7636363636363637,
'r': 0.9130434782608695},
{'a': 0.9473358116480793,
'f1': 0.9514563106796117,
'p': 0.9444444444444444,
'r': 0.9585730724971231},
{'a': 0.954856912535268,
'f1': 0.9673849737914967,
'p': 0.9690781796966161,
'r': 0.9656976744186047},
{'a': 0.9388619854721549,
'f1': 0.9434173669467788,
'p': 0.9365962180200222,
'r': 0.9503386004514672},
{'a': 0.9327830188679245,
'f1': 0.9394904458598727,
'p': 0.9247648902821317,
'r': 0.9546925566343042},
{'a': 0.5090819833087874,
'f1': 0.424626006904488,
'p': 0.28103579588728106,
'r': 0.8682352941176471},
{'a': 0.9226475279106858,
'f1': 0.9049951028403526,
'p': 0.8619402985074627,
'r': 0.9525773195876288},
{'a': 0.6294383473208521,
'f1': 0.5043177892918825,
'p': 0.3501199040767386,
'r': 0.9012345679012346},
{'a': 0.9383062254627033,
'f1': 0.9467570183930301,
'p': 0.933206106870229,
'r': 0.9607072691552063},
{'a': 0.9437974683544303,
'f1': 0.9548596990646605,
'p': 0.9615069615069615,
'r': 0.9483037156704361},
{'a': 0.9378813089295619,
'f1': 0.9463087248322147,
'p': 0.9302544769085768,
'r': 0.9629268292682926},
{'a': 0.9467640918580376,
'f1': 0.9561855670103093,
'p': 0.9392405063291139,
'r': 0.973753280839895},
{'a': 0.6232948583420777,
'f1': 0.5844907407407408,
'p': 0.42616033755274263,
'r': 0.9300184162062615},
{'a': 0.921644685802948,
'f1': 0.8745341614906831,
'p': 0.8421052631578947,
'r': 0.9095607235142119},
{'a': 0.9337785197551475,
'f1': 0.9339988907376595,
'p': 0.9252747252747253,
'r': 0.9428891377379619},
{'a': 0.9332849891225525,
'f1': 0.9138576779026217,
'p': 0.8792792792792793,
'r': 0.9512670565302144},
{'a': 0.9295392953929539,
'f1': 0.9280774550484093,
'p': 0.8994638069705094,
'r': 0.9585714285714285},
{'a': 0.9257278669043375,
'f1': 0.9226963512677798,
'p': 0.8902147971360382,
'r': 0.9576379974326059},
{'a': 0.9289575289575289,
'f1': 0.9162112932604737,
'p': 0.8871252204585538,
'r': 0.9472693032015066},
{'a': 0.9108678655199375,
'f1': 0.8564231738035265,
'p': 0.815347721822542,
'r': 0.9018567639257294},
{'a': 0.9315263908701854,
'f1': 0.9280359820089956,
'p': 0.9225037257824144,
'r': 0.9336349924585219}],
'www.nymag.com;2005': [{'a': 0.6613294517224648,
'f1': 0.6919682259488085,
'p': 0.5403170227429359,
'r': 0.9619631901840491},
{'a': 0.665680473372781,
'f1': 0.6986666666666668,
'p': 0.5500349895031491,
'r': 0.9573690621193667},
{'a': 0.6496783770410688,
'f1': 0.6758241758241758,
'p': 0.5208186309103741,
'r': 0.9621903520208605},
{'a': 0.6781731279872544,
'f1': 0.6988071570576541,
'p': 0.5466562986003111,
'r': 0.9683195592286501},
{'a': 0.6708927628103539,
'f1': 0.690818858560794,
'p': 0.5403726708074534,
'r': 0.9573590096286108},
{'a': 0.6703910614525139,
'f1': 0.5406006674082313,
'p': 0.39067524115755625,
'r': 0.8772563176895307},
{'a': 0.6880984952120383,
'f1': 0.645412130637636,
'p': 0.4964114832535885,
'r': 0.9222222222222223},
{'a': 0.6727456940222898,
'f1': 0.7063636363636364,
'p': 0.561822125813449,
'r': 0.9510403916768666},
{'a': 0.7342368886269888,
'f1': 0.7464867903316469,
'p': 0.6240601503759399,
'r': 0.9286713286713286},
{'a': 0.7742616033755274,
'f1': 0.8087578194816801,
'p': 0.7020946470131885,
'r': 0.9536354056902002},
{'a': 0.754982984929509,
'f1': 0.7917355371900826,
'p': 0.6808813077469794,
'r': 0.945705824284304},
{'a': 0.7540574282147315,
'f1': 0.811000959385993,
'p': 0.698237885462555,
'r': 0.9672006102212052},
{'a': 0.6411800120409392,
'f1': 0.5945578231292518,
'p': 0.44275582573454914,
'r': 0.9047619047619048},
{'a': 0.6382235528942116,
'f1': 0.6687985381452719,
'p': 0.5184135977337111,
'r': 0.9420849420849421},
{'a': 0.666501976284585,
'f1': 0.7038174637999123,
'p': 0.5596650383810189,
'r': 0.9479905437352246},
{'a': 0.6983471074380165,
'f1': 0.7477927063339731,
'p': 0.6152874289324068,
'r': 0.9530332681017613},
{'a': 0.7774750227066304,
'f1': 0.825,
'p': 0.7205240174672489,
'r': 0.9649122807017544},
{'a': 0.6593951412989588,
'f1': 0.690401081568274,
'p': 0.5364145658263305,
'r': 0.9683944374209861},
{'a': 0.6951392166116093,
'f1': 0.7424242424242424,
'p': 0.6033700583279326,
'r': 0.9647668393782384},
{'a': 0.6561546286876907,
'f1': 0.6838166510757717,
'p': 0.528179190751445,
'r': 0.9694960212201591},
{'a': 0.7003959524857017,
'f1': 0.7520931925737167,
'p': 0.6126927639383155,
'r': 0.9736098020735156},
{'a': 0.6898638426626323,
'f1': 0.7220967013104382,
'p': 0.5777295733911786,
'r': 0.9626506024096385},
{'a': 0.6380900705371677,
'f1': 0.6538661131292164,
'p': 0.49960348929421095,
'r': 0.9459459459459459},
{'a': 0.6635071090047393,
'f1': 0.540948275862069,
'p': 0.3909657320872274,
'r': 0.8776223776223776},
{'a': 0.6389038151531434,
'f1': 0.6560900716479018,
'p': 0.5035349567949725,
'r': 0.9412628487518355},
{'a': 0.771545130035696,
'f1': 0.7952468007312615,
'p': 0.6965572457966374,
'r': 0.9265175718849841}],
'www.nymag.com;2010': [{'a': 0.38664596273291924,
'f1': 0.17947652679684253,
'p': 0.10032512772875057,
'r': 0.8503937007874016},
{'a': 0.36547152756854473,
'f1': 0.300796812749004,
'p': 0.180622009569378,
'r': 0.8988095238095238},
{'a': 0.26850315300365085,
'f1': 0.06610169491525424,
'p': 0.0344675209898365,
'r': 0.8041237113402062},
{'a': 0.2766169154228856,
'f1': 0.08782936010037641,
'p': 0.04633715798764342,
'r': 0.84},
{'a': 0.32779021426287175,
'f1': 0.2043906131718395,
'p': 0.11642949547218628,
'r': 0.8359133126934984},
{'a': 0.29359260439910745,
'f1': 0.14506172839506173,
'p': 0.07922461019806153,
'r': 0.8584474885844748},
{'a': 0.29476584022038566,
'f1': 0.09540636042402825,
'p': 0.05060918462980318,
'r': 0.8307692307692308},
{'a': 0.3440993788819876,
'f1': 0.251063829787234,
'p': 0.14640198511166252,
'r': 0.8805970149253731},
{'a': 0.28874130297280204,
'f1': 0.13732259301879554,
'p': 0.07467667918231122,
'r': 0.8523809523809524},
{'a': 0.28453214513049013,
'f1': 0.13204633204633207,
'p': 0.07157806613645877,
'r': 0.8507462686567164},
{'a': 0.5196145905024088,
'f1': 0.5415936952714537,
'p': 0.3813193588162762,
'r': 0.9342900302114804},
{'a': 0.2915881983678594,
'f1': 0.15309568480300187,
'p': 0.08408903544929926,
'r': 0.8535564853556485},
{'a': 0.2801883619239825,
'f1': 0.08858603066439522,
'p': 0.0466995958688819,
'r': 0.859504132231405},
{'a': 0.26184951939012263,
'f1': 0.05755395683453237,
'p': 0.02989010989010989,
'r': 0.7727272727272727},
{'a': 0.2967103161929096,
'f1': 0.1550268610897928,
'p': 0.08501683501683502,
'r': 0.8782608695652174},
{'a': 0.31398416886543534,
'f1': 0.15309446254071662,
'p': 0.0842671447781264,
'r': 0.8355555555555556},
{'a': 0.3157720344599072,
'f1': 0.1643059490084986,
'p': 0.09094982078853046,
'r': 0.8493723849372385},
{'a': 0.32776886035313,
'f1': 0.20501138952164008,
'p': 0.11617900172117039,
'r': 0.8709677419354839},
{'a': 0.4830122591943958,
'f1': 0.4577516531961793,
'p': 0.30689655172413793,
'r': 0.9002890173410405},
{'a': 0.3631793905120955,
'f1': 0.11368605159597725,
'p': 0.06143667296786389,
'r': 0.7602339181286549},
{'a': 0.36385911179173047,
'f1': 0.29377762665759943,
'p': 0.1761827079934747,
'r': 0.8834355828220859},
{'a': 0.29823455233291296,
'f1': 0.1625282167042889,
'p': 0.08973826339842127,
'r': 0.8605577689243028},
{'a': 0.315648982315649,
'f1': 0.16114519427402862,
'p': 0.08877872915727805,
'r': 0.8716814159292036},
{'a': 0.2774643212744773,
'f1': 0.09253855773238849,
'p': 0.048963387737097484,
'r': 0.8409090909090909},
{'a': 0.5207010414020828,
'f1': 0.5855479903360421,
'p': 0.42656,
'r': 0.9334733893557423}],
'www.nymag.com;2015': [{'a': 0.11080501989592899,
'f1': 0.004650024473813021,
'p': 0.002331574426309977,
'r': 0.8260869565217391},
{'a': 0.11209059765650571,
'f1': 0.007705813499804916,
'p': 0.003869419342198712,
'r': 0.9028571428571428},
{'a': 0.11870604781997188,
'f1': 0.024618626817060608,
'p': 0.012482999805712065,
'r': 0.8846815834767642},
{'a': 0.16200859115871893,
'f1': 0.05840843134259966,
'p': 0.03019869223236567,
'r': 0.8868274582560297},
{'a': 0.2533825649326016,
'f1': 0.2545702454297546,
'p': 0.1474364598871048,
'r': 0.9312765564382043},
{'a': 0.17140029952931107,
'f1': 0.0838040039033622,
'p': 0.04393389762192664,
'r': 0.9060102301790282},
{'a': 0.11113056035708815,
'f1': 0.005288932419196866,
'p': 0.0026523244676932146,
'r': 0.8925619834710744},
{'a': 0.11253879712629956,
'f1': 0.010790148545991194,
'p': 0.0054288287849648224,
'r': 0.867704280155642},
{'a': 0.11240250802875057,
'f1': 0.008444379362522576,
'p': 0.0042417555473826165,
'r': 0.9153439153439153},
{'a': 0.1114923187867398,
'f1': 0.006451139945751778,
'p': 0.0032375953496357707,
'r': 0.868421052631579},
{'a': 0.11615075486043228,
'f1': 0.017957134581965628,
'p': 0.009072506889739776,
'r': 0.8671328671328671},
{'a': 0.14342384623980334,
'f1': 0.00898513251454428,
'p': 0.004517680707228289,
'r': 0.8081395348837209},
{'a': 0.11244113029827316,
'f1': 0.008717886277852187,
'p': 0.004380919748403045,
'r': 0.8689320388349514},
{'a': 0.12663672890569605,
'f1': 0.020308605970624102,
'p': 0.010269473119721142,
'r': 0.9054373522458629},
{'a': 0.2906214339593447,
'f1': 0.34723334579173115,
'p': 0.21201451826737083,
'r': 0.9586281446540881},
{'a': 0.11254012271546171,
'f1': 0.00931139549055454,
'p': 0.004681257812308522,
'r': 0.8526785714285714},
{'a': 0.11154812629738883,
'f1': 0.006256721087105289,
'p': 0.0031401025439737017,
'r': 0.8366013071895425},
{'a': 0.636987923277291,
'f1': 0.7536557930258718,
'p': 0.6140350877192983,
'r': 0.9754575707154742},
{'a': 0.11290674343540086,
'f1': 0.010172048767429975,
'p': 0.005116905373974782,
'r': 0.842741935483871},
{'a': 0.11335521881861861,
'f1': 0.012255080045532708,
'p': 0.006171032733304063,
'r': 0.8694158075601375},
{'a': 0.14546769393374684,
'f1': 0.014688432057984027,
'p': 0.007412442545478086,
'r': 0.7979094076655052},
{'a': 0.1513119774853075,
'f1': 0.03099896040071827,
'p': 0.01577479239475456,
'r': 0.8880866425992779},
{'a': 0.1109951514360881,
'f1': 0.006463527239150508,
'p': 0.0032435068894037314,
'r': 0.8926174496644296},
{'a': 0.11035750229729138,
'f1': 0.0034312043527278072,
'p': 0.0017189303342091693,
'r': 0.8860759493670886},
{'a': 0.111470749043193,
'f1': 0.006261618236963114,
'p': 0.003142029554715499,
'r': 0.8767123287671232},
{'a': 0.1487857618097139,
'f1': 0.023719436583892403,
'p': 0.012017526902506605,
'r': 0.9031476997578692}]},
{'entertainment.msn.com;2000': [{'a': 0.8826574633304572,
'f1': 0.71900826446281,
'p': 0.6170212765957447,
'r': 0.8613861386138614},
{'a': 0.8324066719618745,
'f1': 0.6955266955266955,
'p': 0.5950617283950618,
'r': 0.8368055555555556},
{'a': 0.8578982154659617,
'f1': 0.8281374900079937,
'p': 0.7485549132947977,
'r': 0.9266547406082289},
{'a': 0.8097199341021417,
'f1': 0.6526315789473685,
'p': 0.5216346153846154,
'r': 0.8714859437751004},
{'a': 0.8768197088465846,
'f1': 0.7808764940239042,
'p': 0.6853146853146853,
'r': 0.9074074074074074},
{'a': 0.8791469194312796,
'f1': 0.7759882869692533,
'p': 0.6847545219638242,
'r': 0.8952702702702703},
{'a': 0.8872445384073291,
'f1': 0.8666666666666666,
'p': 0.8201892744479495,
'r': 0.9187279151943463},
{'a': 0.7917737789203085,
'f1': 0.6048780487804878,
'p': 0.45925925925925926,
'r': 0.8857142857142857},
{'a': 0.8326724821570183,
'f1': 0.7398273736128236,
'p': 0.635593220338983,
'r': 0.8849557522123894},
{'a': 0.8015803336259877,
'f1': 0.562015503875969,
'p': 0.4264705882352941,
'r': 0.8238636363636364},
{'a': 0.8655876143560872,
'f1': 0.8398994132439229,
'p': 0.7791601866251944,
'r': 0.9109090909090909},
{'a': 0.9019097222222222,
'f1': 0.8300751879699249,
'p': 0.7603305785123967,
'r': 0.9139072847682119},
{'a': 0.9068193649141072,
'f1': 0.9089058524173028,
'p': 0.9056795131845842,
'r': 0.9121552604698672},
{'a': 0.8921859545004945,
'f1': 0.4630541871921182,
'p': 0.3219178082191781,
'r': 0.8245614035087719},
{'a': 0.857813547954393,
'f1': 0.7805383022774327,
'p': 0.6968576709796673,
'r': 0.8870588235294118},
{'a': 0.8879568603954464,
'f1': 0.8440366972477065,
'p': 0.790625,
'r': 0.9051878354203936},
{'a': 0.8733624454148472,
'f1': 0.778061224489796,
'p': 0.6823266219239373,
'r': 0.9050445103857567},
{'a': 0.8296751536435469,
'f1': 0.6655172413793103,
'p': 0.5514285714285714,
'r': 0.8391304347826087},
{'a': 0.8378127896200185,
'f1': 0.6203904555314534,
'p': 0.49480968858131485,
'r': 0.8313953488372093},
{'a': 0.7847809377401999,
'f1': 0.6698113207547169,
'p': 0.5358490566037736,
'r': 0.8930817610062893},
{'a': 0.8850308641975309,
'f1': 0.8471794871794872,
'p': 0.7866666666666666,
'r': 0.9177777777777778},
{'a': 0.8874364560639071,
'f1': 0.8648648648648649,
'p': 0.7948717948717948,
'r': 0.9483747609942639},
{'a': 0.8893905191873589,
'f1': 0.8209500609013399,
'p': 0.7505567928730512,
'r': 0.9059139784946236},
{'a': 0.8669086509376891,
'f1': 0.8294573643410853,
'p': 0.7599431818181818,
'r': 0.9129692832764505},
{'a': 0.9388830347734457,
'f1': 0.9512331838565022,
'p': 0.9454038997214484,
'r': 0.9571347997743936}],
'entertainment.msn.com;2005': [{'a': 0.670605612998523,
'f1': 0.6795977011494252,
'p': 0.5368898978433598,
'r': 0.9256360078277887},
{'a': 0.509469696969697,
'f1': 0.2127659574468085,
'p': 0.12367491166077739,
'r': 0.7608695652173914},
{'a': 0.41793103448275865,
'f1': 0.22140221402214022,
'p': 0.12875536480686695,
'r': 0.7894736842105263},
{'a': 0.6264980026631158,
'f1': 0.5427872860635697,
'p': 0.39501779359430605,
'r': 0.8671875},
{'a': 0.6695217701641685,
'f1': 0.6769016050244243,
'p': 0.5498866213151927,
'r': 0.8802177858439202},
{'a': 0.6118508655126498,
'f1': 0.5697416974169742,
'p': 0.4182015167930661,
'r': 0.8935185185185185},
{'a': 0.5877976190476191,
'f1': 0.561014263074485,
'p': 0.4092485549132948,
'r': 0.8916876574307305},
{'a': 0.6855563234277816,
'f1': 0.715090795241077,
'p': 0.571,
'r': 0.9564489112227805},
{'a': 0.6617954070981211,
'f1': 0.5874363327674024,
'p': 0.4385297845373891,
'r': 0.8894601542416453},
{'a': 0.7581007378889958,
'f1': 0.8292572463768116,
'p': 0.7728999577880963,
'r': 0.8944797264289204},
{'a': 0.7184466019417476,
'f1': 0.8004914004914004,
'p': 0.7230359520639148,
'r': 0.8965327462850853},
{'a': 0.6350267379679144,
'f1': 0.6330645161290323,
'p': 0.4767206477732793,
'r': 0.942},
{'a': 0.6236786469344608,
'f1': 0.625,
'p': 0.4744136460554371,
'r': 0.9156378600823045},
{'a': 0.7021180712032447,
'f1': 0.7549128661475714,
'p': 0.6529826812059012,
'r': 0.8945518453427065},
{'a': 0.5229007633587787,
'f1': 0.40076701821668265,
'p': 0.26125,
'r': 0.8600823045267489},
{'a': 0.570884871550904,
'f1': 0.3377386196769457,
'p': 0.21256931608133087,
'r': 0.8214285714285714},
{'a': 0.6684532924961715,
'f1': 0.672713529856387,
'p': 0.5210772833723654,
'r': 0.9488272921108742},
{'a': 0.47289866457187746,
'f1': 0.26344676180021953,
'p': 0.15604681404421328,
'r': 0.8450704225352113},
{'a': 0.6213753106876554,
'f1': 0.5692742695570218,
'p': 0.41944444444444445,
'r': 0.8856304985337243},
{'a': 0.6492595479345284,
'f1': 0.6299342105263157,
'p': 0.482367758186398,
'r': 0.9075829383886256},
{'a': 0.5830838323353293,
'f1': 0.34547591069330197,
'p': 0.22072072072072071,
'r': 0.7945945945945946},
{'a': 0.5624538063562454,
'f1': 0.5074875207986689,
'p': 0.3600944510035419,
'r': 0.8591549295774648},
{'a': 0.6191726854891662,
'f1': 0.536,
'p': 0.38461538461538464,
'r': 0.8839050131926122},
{'a': 0.6817647058823529,
'f1': 0.7019283746556474,
'p': 0.5563318777292576,
'r': 0.9507462686567164},
{'a': 0.6039009752438109,
'f1': 0.5855572998430141,
'p': 0.43071593533487296,
'r': 0.9142156862745098},
{'a': 0.44510385756676557,
'f1': 0.2240663900414938,
'p': 0.12980769230769232,
'r': 0.8181818181818182},
{'a': 0.6509884117246081,
'f1': 0.6497948016415869,
'p': 0.5005268703898841,
'r': 0.9259259259259259}],
'entertainment.msn.com;2010': [{'a': 0.31368631368631367,
'f1': 0.16253555465258027,
'p': 0.08944543828264759,
'r': 0.8888888888888888},
{'a': 0.42234600262123195,
'f1': 0.4013582342954159,
'p': 0.2600087989441267,
'r': 0.8794642857142857},
{'a': 0.5831491712707182,
'f1': 0.5165011214354374,
'p': 0.36487098234495247,
'r': 0.8837719298245614},
{'a': 0.7518530454398968,
'f1': 0.6949286846275753,
'p': 0.5519194461925739,
'r': 0.9379679144385027},
{'a': 0.30541237113402064,
'f1': 0.2892307692307693,
'p': 0.17486048365665693,
'r': 0.8360864040660737},
{'a': 0.4846876276031033,
'f1': 0.2894144144144144,
'p': 0.17306397306397306,
'r': 0.8831615120274914},
{'a': 0.13659695817490494,
'f1': 0.10503497881564687,
'p': 0.055958005249343835,
'r': 0.8541666666666666},
{'a': 0.5793167992512869,
'f1': 0.5240868184224458,
'p': 0.3694029850746269,
'r': 0.9016393442622951},
{'a': 0.6884253028263796,
'f1': 0.5832583258325833,
'p': 0.43142476697736354,
'r': 0.9},
{'a': 0.7321048901488306,
'f1': 0.6330097087378641,
'p': 0.4858420268256334,
'r': 0.9080779944289693},
{'a': 0.6058128973660308,
'f1': 0.5753424657534247,
'p': 0.421505376344086,
'r': 0.9060092449922958},
{'a': 0.33385826771653543,
'f1': 0.23563426093241774,
'p': 0.13623067279565398,
'r': 0.8716577540106952},
{'a': 0.1712878984192796,
'f1': 0.18625954198473282,
'p': 0.1035458317615994,
'r': 0.9258010118043845},
{'a': 0.37289915966386555,
'f1': 0.29293328069482827,
'p': 0.17922705314009663,
'r': 0.8012958963282938},
{'a': 0.5991921005385996,
'f1': 0.5418163160595177,
'p': 0.38823529411764707,
'r': 0.8964346349745331},
{'a': 0.4107415318889228,
'f1': 0.4030911901081917,
'p': 0.2594508555511341,
'r': 0.9030470914127424},
{'a': 0.7280501710376283,
'f1': 0.7841628959276018,
'p': 0.6634762633996937,
'r': 0.9585176991150443},
{'a': 0.1322271763759959,
'f1': 0.1340627279358133,
'p': 0.07223139196730331,
'r': 0.9311043566362716},
{'a': 0.592958616429895,
'f1': 0.4193832599118943,
'p': 0.2809917355371901,
'r': 0.8263888888888888},
{'a': 0.5938538205980066,
'f1': 0.5834752981260647,
'p': 0.4338188727042432,
'r': 0.8907672301690507},
{'a': 0.6850828729281768,
'f1': 0.740350035962599,
'p': 0.6188376753507014,
'r': 0.9212410501193318},
{'a': 0.44430538172715894,
'f1': 0.45151327980234707,
'p': 0.30008210180623973,
'r': 0.9114713216957606},
{'a': 0.635061919504644,
'f1': 0.6552102376599633,
'p': 0.5096700796359499,
'r': 0.917093142272262},
{'a': 0.36031015265325905,
'f1': 0.30998431782540514,
'p': 0.1877176321620766,
'r': 0.889055472263868},
{'a': 0.5887392900856793,
'f1': 0.5703324808184144,
'p': 0.4207547169811321,
'r': 0.8849206349206349}],
'entertainment.msn.com;2015': [{'a': 0.5808388941849381,
'f1': 0.29478748997594223,
'p': 0.17761886354851178,
'r': 0.8661639962299718},
{'a': 0.5505664263645726,
'f1': 0.0843474611833823,
'p': 0.04441007512152011,
'r': 0.8375},
{'a': 0.5677706376668314,
'f1': 0.20739666424945613,
'p': 0.11796246648793565,
'r': 0.8575712143928036},
{'a': 0.5486353375743895,
'f1': 0.08220321301898603,
'p': 0.04327768014059754,
'r': 0.8174273858921162},
{'a': 0.549887548558577,
'f1': 0.08746113989637305,
'p': 0.04628207940337793,
'r': 0.793233082706767},
{'a': 0.5456209948123284,
'f1': 0.0874361593462717,
'p': 0.04618040569702201,
'r': 0.8199233716475096},
{'a': 0.4804422480944803,
'f1': 0.1699451358222936,
'p': 0.09386548410938655,
'r': 0.8968926553672316},
{'a': 0.5481046559538525,
'f1': 0.06719115458218158,
'p': 0.03506435863293387,
'r': 0.8020304568527918},
{'a': 0.543599615014437,
'f1': 0.11198501872659175,
'p': 0.05995588530178464,
'r': 0.8470254957507082},
{'a': 0.5436064240699329,
'f1': 0.07422680412371134,
'p': 0.03892733564013841,
'r': 0.7964601769911505},
{'a': 0.4519289650949173,
'f1': 0.08016443987667009,
'p': 0.041941926563220155,
'r': 0.9039735099337748},
{'a': 0.5528719301806373,
'f1': 0.1188,
'p': 0.06392595781317262,
'r': 0.8389830508474576},
{'a': 0.5419456478928713,
'f1': 0.1575516117348787,
'p': 0.0864123957091776,
'r': 0.8913934426229508},
{'a': 0.5308917816203613,
'f1': 0.11117246456837843,
'p': 0.059613106987761545,
'r': 0.8228882833787466},
{'a': 0.4994415487714073,
'f1': 0.048141592920353984,
'p': 0.02477231329690346,
'r': 0.85},
{'a': 0.5155402963498373,
'f1': 0.18535399574597383,
'p': 0.10388283378746593,
'r': 0.8591549295774648},
{'a': 0.5493503772003353,
'f1': 0.04908246738890118,
'p': 0.02531934306569343,
'r': 0.7985611510791367},
{'a': 0.4887567366660472,
'f1': 0.1332703213610586,
'p': 0.07184103260869565,
'r': 0.9195652173913044},
{'a': 0.5071130500758725,
'f1': 0.09063867016622922,
'p': 0.04780361757105943,
'r': 0.8720538720538721},
{'a': 0.49243478260869566,
'f1': 0.1641128454818846,
'p': 0.09070761437391167,
'r': 0.8603603603603603},
{'a': 0.5690244780417567,
'f1': 0.32880168185003505,
'p': 0.20276577355229042,
'r': 0.8688888888888889},
{'a': 0.5555978674790556,
'f1': 0.1726338177951081,
'p': 0.0960362847564583,
'r': 0.8528896672504378},
{'a': 0.542801754922967,
'f1': 0.05603539077311987,
'p': 0.029192273924495173,
'r': 0.6963350785340314},
{'a': 0.5430300543645502,
'f1': 0.05794036794248256,
'p': 0.03010327400571303,
'r': 0.7696629213483146},
{'a': 0.5596120935539076,
'f1': 0.18822292323869613,
'p': 0.1057711246799291,
'r': 0.8537360890302067}],
'news.bbc.co.uk;2000': [{'a': 0.7449612403100775,
'f1': 0.6673407482305359,
'p': 0.5238095238095238,
'r': 0.9192200557103064},
{'a': 0.7365101611772951,
'f1': 0.656307129798903,
'p': 0.5248538011695907,
'r': 0.875609756097561},
{'a': 0.8245315161839863,
'f1': 0.8346709470304976,
'p': 0.7572815533980582,
'r': 0.929678188319428},
{'a': 0.8386012104909213,
'f1': 0.8278335724533716,
'p': 0.7474093264248705,
'r': 0.927652733118971},
{'a': 0.8293723339427178,
'f1': 0.829059829059829,
'p': 0.7437020810514786,
'r': 0.9365517241379311},
{'a': 0.7433188315724052,
'f1': 0.7081272084805654,
'p': 0.5765247410817032,
'r': 0.9175824175824175},
{'a': 0.7884615384615384,
'f1': 0.7144340602284527,
'p': 0.6067019400352733,
'r': 0.8686868686868687},
{'a': 0.756838905775076,
'f1': 0.7044334975369458,
'p': 0.5685884691848907,
'r': 0.9255663430420712},
{'a': 0.7388167388167388,
'f1': 0.6379999999999999,
'p': 0.5023622047244094,
'r': 0.873972602739726},
{'a': 0.8223965763195435,
'f1': 0.8056206088992974,
'p': 0.712707182320442,
'r': 0.926391382405745},
{'a': 0.7558651026392962,
'f1': 0.6813397129186602,
'p': 0.5443425076452599,
'r': 0.9104859335038363},
{'a': 0.7374331550802139,
'f1': 0.7372926698769396,
'p': 0.6146297948260482,
'r': 0.9211229946524064},
{'a': 0.7718332022029898,
'f1': 0.6580188679245282,
'p': 0.5284090909090909,
'r': 0.871875},
{'a': 0.7395459976105138,
'f1': 0.7240506329113925,
'p': 0.5970772442588727,
'r': 0.9196141479099679},
{'a': 0.7801879971077368,
'f1': 0.7271095152603232,
'p': 0.6062874251497006,
'r': 0.9080717488789237},
{'a': 0.7600258732212161,
'f1': 0.7329013678905687,
'p': 0.6088516746411483,
'r': 0.9204339963833634},
{'a': 0.7575957727873184,
'f1': 0.7112509834775768,
'p': 0.587012987012987,
'r': 0.9021956087824351},
{'a': 0.7924311926605505,
'f1': 0.786556603773585,
'p': 0.69916142557652,
'r': 0.898921832884097},
{'a': 0.8004326663061114,
'f1': 0.8102827763496144,
'p': 0.719634703196347,
'r': 0.9270588235294117},
{'a': 0.7306451612903225,
'f1': 0.7293354943273905,
'p': 0.6092057761732852,
'r': 0.9084791386271871},
{'a': 0.7519083969465649,
'f1': 0.7357723577235773,
'p': 0.6107986501687289,
'r': 0.9250425894378195},
{'a': 0.7746144721233689,
'f1': 0.767156862745098,
'p': 0.6568730325288562,
'r': 0.9219440353460973},
{'a': 0.8045016077170418,
'f1': 0.7940379403794038,
'p': 0.7009569377990431,
'r': 0.915625},
{'a': 0.7654145995747696,
'f1': 0.7083700440528635,
'p': 0.5759312320916905,
'r': 0.919908466819222},
{'a': 0.7517822423849644,
'f1': 0.7113790504898266,
'p': 0.5856079404466501,
'r': 0.9059500959692899}],
'news.bbc.co.uk;2005': [{'a': 0.7882797731568998,
'f1': 0.6606060606060606,
'p': 0.5202863961813843,
'r': 0.9045643153526971},
{'a': 0.7490071485305798,
'f1': 0.6631130063965884,
'p': 0.5106732348111659,
'r': 0.9452887537993921},
{'a': 0.7287535410764873,
'f1': 0.7173431734317344,
'p': 0.5926829268292683,
'r': 0.908411214953271},
{'a': 0.7966231772831927,
'f1': 0.7601809954751131,
'p': 0.6491499227202473,
'r': 0.9170305676855895},
{'a': 0.8022483392948391,
'f1': 0.8212471131639724,
'p': 0.7257142857142858,
'r': 0.9457446808510638},
{'a': 0.7868538608806637,
'f1': 0.7888748419721872,
'p': 0.6849615806805708,
'r': 0.9299552906110283},
{'a': 0.8698795180722891,
'f1': 0.8919135308246597,
'p': 0.840754716981132,
'r': 0.9497016197783461},
{'a': 0.7557142857142857,
'f1': 0.7135678391959799,
'p': 0.5811732605729877,
'r': 0.9240780911062907},
{'a': 0.7448559670781894,
'f1': 0.6370023419203747,
'p': 0.4927536231884058,
'r': 0.9006622516556292},
{'a': 0.7782144862436833,
'f1': 0.7840349917987972,
'p': 0.6815589353612167,
'r': 0.9227799227799228},
{'a': 0.7784131522516083,
'f1': 0.7491909385113269,
'p': 0.6395027624309392,
'r': 0.904296875},
{'a': 0.7469135802469136,
'f1': 0.6706827309236947,
'p': 0.5344,
'r': 0.9002695417789758},
{'a': 0.7769066286528866,
'f1': 0.73541842772612,
'p': 0.6304347826086957,
'r': 0.8823529411764706},
{'a': 0.7711404189294027,
'f1': 0.7052947052947053,
'p': 0.5873544093178037,
'r': 0.8825},
{'a': 0.7408707865168539,
'f1': 0.6441658630665381,
'p': 0.5022556390977444,
'r': 0.8978494623655914},
{'a': 0.7555012224938875,
'f1': 0.6644295302013423,
'p': 0.5201401050788091,
'r': 0.9195046439628483},
{'a': 0.7300564061240935,
'f1': 0.6223224351747463,
'p': 0.4709897610921502,
'r': 0.9169435215946844},
{'a': 0.7878787878787878,
'f1': 0.7928032501450959,
'p': 0.6878147029204431,
'r': 0.9356164383561644},
{'a': 0.6328413284132841,
'f1': 0.4936386768447838,
'p': 0.3419506462984724,
'r': 0.8871951219512195},
{'a': 0.7984988452655889,
'f1': 0.8134687332977018,
'p': 0.716572504708098,
'r': 0.9406674907292955},
{'a': 0.8120243531202436,
'f1': 0.8522727272727272,
'p': 0.7881637168141593,
'r': 0.927734375},
{'a': 0.757001647446458,
'f1': 0.607190412782956,
'p': 0.4596774193548387,
'r': 0.8941176470588236},
{'a': 0.8047619047619048,
'f1': 0.8127853881278538,
'p': 0.7145405887600357,
'r': 0.9423529411764706},
{'a': 0.6646771568095496,
'f1': 0.6213235294117647,
'p': 0.46728110599078343,
'r': 0.926873857404022},
{'a': 0.7598097502972652,
'f1': 0.7449494949494949,
'p': 0.6203995793901157,
'r': 0.9320695102685624}],
'news.bbc.co.uk;2010': [{'a': 0.6634390651085141,
'f1': 0.6355748373101953,
'p': 0.4994318181818182,
'r': 0.8737574552683897},
{'a': 0.6181592039800995,
'f1': 0.4648460197559558,
'p': 0.3184713375796178,
'r': 0.8602150537634409},
{'a': 0.5606661379857256,
'f1': 0.3550640279394645,
'p': 0.22744220730797912,
'r': 0.8090185676392573},
{'a': 0.6077568972411036,
'f1': 0.4583103257868581,
'p': 0.3132075471698113,
'r': 0.8539094650205762},
{'a': 0.5787979176526266,
'f1': 0.222027972027972,
'p': 0.12893401015228426,
'r': 0.7987421383647799},
{'a': 0.6093241778099024,
'f1': 0.5061671996345363,
'p': 0.36091205211726385,
'r': 0.8470948012232415},
{'a': 0.5724852071005917,
'f1': 0.15249266862170088,
'p': 0.08427876823338736,
'r': 0.8},
{'a': 0.6123142250530785,
'f1': 0.37933378653976885,
'p': 0.246684350132626,
'r': 0.8205882352941176},
{'a': 0.5842696629213483,
'f1': 0.3356353591160221,
'p': 0.21075455333911536,
'r': 0.823728813559322},
{'a': 0.6760172626387176,
'f1': 0.6664550936210727,
'p': 0.5368098159509203,
'r': 0.8786610878661087},
{'a': 0.5968468468468469,
'f1': 0.34336023477622896,
'p': 0.2174721189591078,
'r': 0.8153310104529616},
{'a': 0.6375916634504053,
'f1': 0.540381791483113,
'p': 0.3968368080517613,
'r': 0.8466257668711656},
{'a': 0.5827702702702703,
'f1': 0.35927367055771725,
'p': 0.23121869782971619,
'r': 0.8052325581395349},
{'a': 0.5996518711923412,
'f1': 0.37584803256445043,
'p': 0.2444836716681377,
'r': 0.8123167155425219},
{'a': 0.5774891774891775,
'f1': 0.23028391167192425,
'p': 0.13129496402877697,
'r': 0.9358974358974359},
{'a': 0.6293396868618107,
'f1': 0.5687128712871288,
'p': 0.42359882005899707,
'r': 0.8650602409638555},
{'a': 0.6198347107438017,
'f1': 0.4339378238341968,
'p': 0.2920662598081953,
'r': 0.8438287153652393},
{'a': 0.5741592920353983,
'f1': 0.45043398812243035,
'p': 0.304320987654321,
'r': 0.8664323374340949},
{'a': 0.596656976744186,
'f1': 0.488479262672811,
'p': 0.34460338101430427,
'r': 0.8386075949367089},
{'a': 0.6525017135023989,
'f1': 0.6182228915662651,
'p': 0.47732558139534886,
'r': 0.8771367521367521},
{'a': 0.6787581699346406,
'f1': 0.6671181848967153,
'p': 0.5341648590021691,
'r': 0.8881875563570785},
{'a': 0.5968362548097478,
'f1': 0.3840627041149575,
'p': 0.2478920741989882,
'r': 0.8521739130434782},
{'a': 0.6576482830385015,
'f1': 0.6133960047003525,
'p': 0.47254073627036813,
'r': 0.8738839285714286},
{'a': 0.5907879710696612,
'f1': 0.448435094920472,
'p': 0.3051675977653631,
'r': 0.8452611218568665},
{'a': 0.5938552188552189,
'f1': 0.37459494491250805,
'p': 0.24163879598662208,
'r': 0.8328530259365994}],
'news.bbc.co.uk;2015': [{'a': 0.5130148741418764,
'f1': 0.21993127147766323,
'p': 0.12708498808578236,
'r': 0.8163265306122449},
{'a': 0.5078571428571429,
'f1': 0.18845700824499412,
'p': 0.10621348911311737,
'r': 0.8350730688935282},
{'a': 0.5344373353406097,
'f1': 0.3631371203020422,
'p': 0.22860847018150388,
'r': 0.8824020016680567},
{'a': 0.4969768470727031,
'f1': 0.12829031433682594,
'p': 0.07005302818866871,
'r': 0.7606060606060606},
{'a': 0.5387695182170026,
'f1': 0.34866189219751226,
'p': 0.21754468485418627,
'r': 0.8776091081593927},
{'a': 0.5138624028164882,
'f1': 0.17438963627304435,
'p': 0.09839752600506045,
'r': 0.7658643326039387},
{'a': 0.500921332388377,
'f1': 0.18664818664818664,
'p': 0.10507152145643693,
'r': 0.8347107438016529},
{'a': 0.511340206185567,
'f1': 0.23300970873786409,
'p': 0.1358148893360161,
'r': 0.8194233687405159},
{'a': 0.4971973943341918,
'f1': 0.10175913396481732,
'p': 0.0545876887340302,
'r': 0.749003984063745},
{'a': 0.5183381524882058,
'f1': 0.16291986247024595,
'p': 0.09077512525788388,
'r': 0.7938144329896907},
{'a': 0.46039070912167357,
'f1': 0.059012875536480686,
'p': 0.030752026838132513,
'r': 0.7284768211920529},
{'a': 0.4838709677419355,
'f1': 0.07127429805615551,
'p': 0.037628278221208664,
'r': 0.673469387755102},
{'a': 0.5200320512820513,
'f1': 0.27889245585874795,
'p': 0.16595033428844316,
'r': 0.8731155778894473},
{'a': 0.5030098370283365,
'f1': 0.1458491042139793,
'p': 0.08018867924528301,
'r': 0.8050139275766016},
{'a': 0.5032202662086732,
'f1': 0.17962656582368233,
'p': 0.1007423117709438,
'r': 0.8278867102396514},
{'a': 0.5006560723137483,
'f1': 0.13879808901181795,
'p': 0.07605400936897216,
'r': 0.7931034482758621},
{'a': 0.5010526315789474,
'f1': 0.23466092572658773,
'p': 0.13693467336683418,
'r': 0.8195488721804511},
{'a': 0.5222575516693164,
'f1': 0.29570312499999996,
'p': 0.17845355964167844,
'r': 0.8621867881548975},
{'a': 0.5255342769913961,
'f1': 0.281269707799033,
'p': 0.1680482290881688,
'r': 0.8621134020618557},
{'a': 0.49470865740150455,
'f1': 0.22431004110393424,
'p': 0.1305239179954442,
'r': 0.7969401947148818},
{'a': 0.5148642826367945,
'f1': 0.21733086190917517,
'p': 0.1252336448598131,
'r': 0.8213660245183888},
{'a': 0.5040306052739445,
'f1': 0.21052631578947367,
'p': 0.12096975756060985,
'r': 0.8107202680067002},
{'a': 0.5261428962496578,
'f1': 0.3028594442207008,
'p': 0.1829238628071029,
'r': 0.87953216374269},
{'a': 0.5136434327725152,
'f1': 0.22835352175863613,
'p': 0.13207057602490918,
'r': 0.8427152317880795},
{'a': 0.4740028958799526,
'f1': 0.1443254817987152,
'p': 0.07933145009416195,
'r': 0.7985781990521327}],
'news.yahoo.com;2000': [{'a': 0.8702185792349727,
'f1': 0.8854041013269,
'p': 0.8505214368482039,
'r': 0.9232704402515723},
{'a': 0.7757575757575758,
'f1': 0.7750759878419453,
'p': 0.6763925729442971,
'r': 0.9074733096085409},
{'a': 0.862708719851577,
'f1': 0.8537549407114624,
'p': 0.7854545454545454,
'r': 0.935064935064935},
{'a': 0.8400830737279336,
'f1': 0.8045685279187818,
'p': 0.7044444444444444,
'r': 0.9378698224852071},
{'a': 0.8602455146364495,
'f1': 0.8412017167381974,
'p': 0.7762376237623763,
'r': 0.9180327868852459},
{'a': 0.8524436090225563,
'f1': 0.8356020942408378,
'p': 0.7747572815533981,
'r': 0.9068181818181819},
{'a': 0.8425800631483987,
'f1': 0.8800275008594018,
'p': 0.839344262295082,
'r': 0.9248554913294798},
{'a': 0.7390519187358916,
'f1': 0.7830330330330332,
'p': 0.6647546207775653,
'r': 0.9525114155251142},
{'a': 0.7267637178051511,
'f1': 0.7474120082815735,
'p': 0.6202749140893471,
'r': 0.9401041666666666},
{'a': 0.706809229037704,
'f1': 0.7151448879168946,
'p': 0.5808170515097691,
'r': 0.930298719772404},
{'a': 0.8319634703196347,
'f1': 0.8087318087318086,
'p': 0.716390423572744,
'r': 0.9284009546539379},
{'a': 0.8673723536737236,
'f1': 0.8903757076685537,
'p': 0.8538993089832182,
'r': 0.9301075268817204},
{'a': 0.8293384467881112,
'f1': 0.7818627450980392,
'p': 0.6787234042553192,
'r': 0.9219653179190751},
{'a': 0.8467670504871567,
'f1': 0.8390697674418605,
'p': 0.760539629005059,
'r': 0.9356846473029046},
{'a': 0.8689390302448392,
'f1': 0.9029505865623889,
'p': 0.8461025982678214,
'r': 0.9679878048780488},
{'a': 0.738355376653249,
'f1': 0.7593865679534638,
'p': 0.6292725679228747,
'r': 0.9573333333333334},
{'a': 0.8487903225806451,
'f1': 0.7922437673130194,
'p': 0.7185929648241206,
'r': 0.8827160493827161},
{'a': 0.8242774566473988,
'f1': 0.7333333333333334,
'p': 0.616519174041298,
'r': 0.9047619047619048},
{'a': 0.8550863723608445,
'f1': 0.8364030335861321,
'p': 0.7524366471734892,
'r': 0.9414634146341463},
{'a': 0.6650246305418719,
'f1': 0.6358768406961178,
'p': 0.4917184265010352,
'r': 0.8996212121212122},
{'a': 0.6255506607929515,
'f1': 0.4248985115020298,
'p': 0.2808586762075134,
'r': 0.8722222222222222},
{'a': 0.8432343234323433,
'f1': 0.8288288288288289,
'p': 0.756578947368421,
'r': 0.9163346613545816},
{'a': 0.8220793140407289,
'f1': 0.7573099415204678,
'p': 0.6363636363636364,
'r': 0.9350180505415162},
{'a': 0.876311844077961,
'f1': 0.8894842598794374,
'p': 0.8248447204968944,
'r': 0.9651162790697675},
{'a': 0.9056603773584906,
'f1': 0.9162861491628614,
'p': 0.8788321167883212,
'r': 0.9570747217806042}],
'news.yahoo.com;2005': [{'a': 0.7106768350810295,
'f1': 0.7175430432759423,
'p': 0.5770958083832335,
'r': 0.948339483394834},
{'a': 0.6811902231668437,
'f1': 0.6415770609318997,
'p': 0.504225352112676,
'r': 0.8817733990147784},
{'a': 0.7314553990610329,
'f1': 0.7428057553956835,
'p': 0.64937106918239,
'r': 0.8676470588235294},
{'a': 0.7258064516129032,
'f1': 0.6480648064806479,
'p': 0.502092050209205,
'r': 0.9137055837563451},
{'a': 0.6931089743589743,
'f1': 0.4980340760157274,
'p': 0.3467153284671533,
'r': 0.8837209302325582},
{'a': 0.7647058823529411,
'f1': 0.7186440677966102,
'p': 0.5816186556927297,
'r': 0.9401330376940134},
{'a': 0.6643445218492717,
'f1': 0.5575959933222037,
'p': 0.39809296781883197,
'r': 0.9303621169916435},
{'a': 0.703955500618047,
'f1': 0.6531498913830558,
'p': 0.5050391937290034,
'r': 0.9241803278688525},
{'a': 0.8939988782950085,
'f1': 0.8486789431545236,
'p': 0.8217054263565892,
'r': 0.8774834437086093},
{'a': 0.7451654490760636,
'f1': 0.7778194080179844,
'p': 0.6607256524506684,
'r': 0.9453551912568307},
{'a': 0.6557719054242003,
'f1': 0.49021627188465505,
'p': 0.34,
'r': 0.8782287822878229},
{'a': 0.6959753909254037,
'f1': 0.7118561710398447,
'p': 0.5760912308297287,
'r': 0.9313413858868405},
{'a': 0.8802267895109851,
'f1': 0.8822299651567944,
'p': 0.8178294573643411,
'r': 0.9576399394856279},
{'a': 0.8166607333570666,
'f1': 0.8590971272229823,
'p': 0.7853926963481741,
'r': 0.9480676328502415},
{'a': 0.8278716798896171,
'f1': 0.8722805221397492,
'p': 0.7921896792189679,
'r': 0.9703872437357631},
{'a': 0.7393989296006587,
'f1': 0.7712323816407662,
'p': 0.6541998773758431,
'r': 0.9392605633802817},
{'a': 0.6601195559350982,
'f1': 0.3800623052959502,
'p': 0.2435129740518962,
'r': 0.8652482269503546},
{'a': 0.7120786516853933,
'f1': 0.6182495344506518,
'p': 0.47293447293447294,
'r': 0.8924731182795699},
{'a': 0.8115535614133483,
'f1': 0.8250000000000001,
'p': 0.7219690063810392,
'r': 0.9623329283110571},
{'a': 0.802187351402758,
'f1': 0.8283828382838283,
'p': 0.7328467153284671,
'r': 0.952561669829222},
{'a': 0.6380290587492103,
'f1': 0.4897595725734639,
'p': 0.33577533577533575,
'r': 0.9046052631578947},
{'a': 0.7056530214424951,
'f1': 0.6986027944111777,
'p': 0.5586592178770949,
'r': 0.9320905459387483},
{'a': 0.785377358490566,
'f1': 0.8115942028985507,
'p': 0.7040229885057471,
'r': 0.9579667644183774},
{'a': 0.650200267022697,
'f1': 0.4902723735408561,
'p': 0.336,
'r': 0.9064748201438849},
{'a': 0.7740604949587534,
'f1': 0.7960281340504758,
'p': 0.6832386363636364,
'r': 0.9534192269573836}],
'news.yahoo.com;2010': [{'a': 0.6546373150487189,
'f1': 0.6197854588796187,
'p': 0.47823421213979156,
'r': 0.8803611738148984},
{'a': 0.6002914035939777,
'f1': 0.3215169002473207,
'p': 0.2070063694267516,
'r': 0.7195571955719557},
{'a': 0.6126561870213624,
'f1': 0.5104431991849211,
'p': 0.3646288209606987,
'r': 0.8505942275042445},
{'a': 0.6347082868821999,
'f1': 0.5768402927249248,
'p': 0.4442970822281167,
'r': 0.8220858895705522},
{'a': 0.6800804828973843,
'f1': 0.6723901098901098,
'p': 0.5438888888888889,
'r': 0.8803956834532374},
{'a': 0.623989218328841,
'f1': 0.4416277518345564,
'p': 0.3087686567164179,
'r': 0.775175644028103},
{'a': 0.6557734204793029,
'f1': 0.6241078509119746,
'p': 0.47900182592818014,
'r': 0.8953356086461889},
{'a': 0.6014291385470425,
'f1': 0.48512820512820515,
'p': 0.3432510885341074,
'r': 0.8269230769230769},
{'a': 0.6791044776119403,
'f1': 0.6557377049180328,
'p': 0.5266380894060012,
'r': 0.8686868686868687},
{'a': 0.5817867932671558,
'f1': 0.39323731997495304,
'p': 0.259289843104872,
'r': 0.8134715025906736},
{'a': 0.5619448340345956,
'f1': 0.26394344069128045,
'p': 0.1590909090909091,
'r': 0.7741935483870968},
{'a': 0.5911764705882353,
'f1': 0.3175122749590835,
'p': 0.1975560081466395,
'r': 0.8083333333333333},
{'a': 0.7223495702005731,
'f1': 0.7505791505791506,
'p': 0.6462765957446809,
'r': 0.8950276243093923},
{'a': 0.5749496981891348,
'f1': 0.22548120989917506,
'p': 0.13457330415754923,
'r': 0.6949152542372882},
{'a': 0.6560595802301964,
'f1': 0.6386913229018492,
'p': 0.4977827050997783,
'r': 0.8908730158730159},
{'a': 0.6480129764801298,
'f1': 0.5633802816901409,
'p': 0.41697691734921816,
'r': 0.8682170542635659},
{'a': 0.6401622718052739,
'f1': 0.5259219668626404,
'p': 0.3904761904761905,
'r': 0.8052373158756138},
{'a': 0.6205170975813178,
'f1': 0.4811858608893957,
'p': 0.3414239482200647,
'r': 0.8146718146718147},
{'a': 0.6233040702314445,
'f1': 0.5237134207870837,
'p': 0.3802197802197802,
'r': 0.8411669367909238},
{'a': 0.643979057591623,
'f1': 0.5584415584415584,
'p': 0.41346153846153844,
'r': 0.86},
{'a': 0.615355664283026,
'f1': 0.5358764759309719,
'p': 0.38995373430270985,
'r': 0.8563134978229318},
{'a': 0.6680628272251309,
'f1': 0.6507528461255968,
'p': 0.5148169668797211,
'r': 0.8842315369261478},
{'a': 0.656319290465632,
'f1': 0.6112040133779264,
'p': 0.47405966277561606,
'r': 0.86},
{'a': 0.651138353765324,
'f1': 0.6212927756653993,
'p': 0.48747016706443913,
'r': 0.8563941299790356},
{'a': 0.5707905314872711,
'f1': 0.3237156931738212,
'p': 0.20646319569120286,
'r': 0.749185667752443}],
'news.yahoo.com;2015': [{'a': 0.29858053223203784,
'f1': 0.03477606322920587,
'p': 0.017834394904458598,
'r': 0.6947368421052632},
{'a': 0.33947522798784063,
'f1': 0.024802173142789655,
'p': 0.012626768749749469,
'r': 0.6938325991189427},
{'a': 0.2532579608252973,
'f1': 0.01872698141867288,
'p': 0.009489666807254323,
'r': 0.704225352112676},
{'a': 0.33853195035196376,
'f1': 0.04742629938747,
'p': 0.024509440947797112,
'r': 0.7298787210584344},
{'a': 0.3760262213204308,
'f1': 0.02592466254081185,
'p': 0.013197062909307403,
'r': 0.7287671232876712},
{'a': 0.3250149471002626,
'f1': 0.0655006118189016,
'p': 0.034222105223571886,
'r': 0.7615062761506276},
{'a': 0.34424390487111733,
'f1': 0.05237940848013256,
'p': 0.02716538375308273,
'r': 0.7291875626880642},
{'a': 0.344436902835065,
'f1': 0.05565583634175692,
'p': 0.02882967118591242,
'r': 0.8008658008658008},
{'a': 0.2595164609053498,
'f1': 0.03406422072667706,
'p': 0.017414642021147793,
'r': 0.7753818508535489},
{'a': 0.3805371029184901,
'f1': 0.028057696107488643,
'p': 0.014302981466559226,
'r': 0.7319587628865979},
{'a': 0.284335304569201,
'f1': 0.008596304323867602,
'p': 0.004324683965402528,
'r': 0.7005988023952096},
{'a': 0.3359246634131986,
'f1': 0.036025773379374175,
'p': 0.018496856265535898,
'r': 0.6884353741496598},
{'a': 0.35797694874127994,
'f1': 0.047474406569917874,
'p': 0.024540590835077924,
'r': 0.7250859106529209},
{'a': 0.32703541918846374,
'f1': 0.04288922155688623,
'p': 0.022122697965329525,
'r': 0.6996336996336996},
{'a': 0.24742687678083386,
'f1': 0.029216517271124783,
'p': 0.014906808526924107,
'r': 0.7294007490636704},
{'a': 0.330388346800123,
'f1': 0.02260855370551747,
'p': 0.011495206872224759,
'r': 0.6805896805896806},
{'a': 0.3364262196947022,
'f1': 0.03363603341808936,
'p': 0.017244590115088086,
'r': 0.6798825256975036},
{'a': 0.2544672243004158,
'f1': 0.013301319507893672,
'p': 0.006721931105575122,
'r': 0.627254509018036},
{'a': 0.3261147454695842,
'f1': 0.033642441233556176,
'p': 0.017268098295328757,
'r': 0.65},
{'a': 0.3437333806062755,
'f1': 0.04171289105835368,
'p': 0.02146037060994635,
'r': 0.7411300919842313},
{'a': 0.32607265735681035,
'f1': 0.018423670732156675,
'p': 0.009353486979946124,
'r': 0.6082725060827251},
{'a': 0.26626690067602704,
'f1': 0.02475322375099899,
'p': 0.012586767419383183,
'r': 0.741267787839586},
{'a': 0.33969465648854963,
'f1': 0.0693295676148208,
'p': 0.03635273791474208,
'r': 0.7465604634322954},
{'a': 0.34300164667137145,
'f1': 0.06369640282946126,
'p': 0.03327379076039368,
'r': 0.7433489827856025},
{'a': 0.2550602221179415,
'f1': 0.010924195223260644,
'p': 0.005513048946651294,
'r': 0.5910112359550562}],
'thenation.com;2000': [{'a': 0.9431506849315069,
'f1': 0.9591334318069915,
'p': 0.9662698412698413,
'r': 0.9521016617790812},
{'a': 0.90315142198309,
'f1': 0.9238210399032648,
'p': 0.8883720930232558,
'r': 0.9622166246851386},
{'a': 0.8384937238493724,
'f1': 0.8602461984069515,
'p': 0.7867549668874172,
'r': 0.9488817891373802},
{'a': 0.9204381666186221,
'f1': 0.9527720739219714,
'p': 0.93048128342246,
'r': 0.9761570827489481},
{'a': 0.7707959305804907,
'f1': 0.8168340506934482,
'p': 0.7231160033869602,
'r': 0.9384615384615385},
{'a': 0.8483072916666666,
'f1': 0.8849382716049383,
'p': 0.8212648945921174,
'r': 0.9593147751605996},
{'a': 0.9346846846846847,
'f1': 0.9621903520208606,
'p': 0.9418604651162791,
'r': 0.9834172342315665},
{'a': 0.9323270440251572,
'f1': 0.9608043129826607,
'p': 0.9385140905209223,
'r': 0.9841791044776119},
{'a': 0.8972736124634859,
'f1': 0.9314712569015914,
'p': 0.8945726762320648,
'r': 0.9715447154471545},
{'a': 0.910699919549477,
'f1': 0.9271175311884439,
'p': 0.9039692701664532,
'r': 0.9514824797843666},
{'a': 0.9109347442680776,
'f1': 0.9427762039660057,
'p': 0.908793009284544,
'r': 0.9793996468510888},
{'a': 0.8136932192231732,
'f1': 0.8526808953669963,
'p': 0.7697368421052632,
'r': 0.9556592765460911},
{'a': 0.9138576779026217,
'f1': 0.8535031847133757,
'p': 0.8553191489361702,
'r': 0.8516949152542372},
{'a': 0.9180722891566265,
'f1': 0.8571428571428571,
'p': 0.8571428571428571,
'r': 0.8571428571428571},
{'a': 0.9352564102564103,
'f1': 0.9537757437070938,
'p': 0.9353680430879713,
'r': 0.972922502334267},
{'a': 0.8493333333333334,
'f1': 0.8846938775510205,
'p': 0.8171536286522149,
'r': 0.96440489432703},
{'a': 0.927860696517413,
'f1': 0.9488986784140968,
'p': 0.9316608996539792,
'r': 0.966786355475763},
{'a': 0.946524064171123,
'f1': 0.9664804469273743,
'p': 0.9751972942502819,
'r': 0.9579180509413068},
{'a': 0.868421052631579,
'f1': 0.9014778325123153,
'p': 0.8464384828862165,
'r': 0.964172813487882},
{'a': 0.9170692431561996,
'f1': 0.9324590163934426,
'p': 0.9080459770114943,
'r': 0.9582210242587601},
{'a': 0.9505260801432729,
'f1': 0.9717355160506459,
'p': 0.9627470856563609,
'r': 0.9808933643170669},
{'a': 0.9528054535920294,
'f1': 0.9743918053777207,
'p': 0.9706632653061225,
'r': 0.9781491002570694},
{'a': 0.9290882778581766,
'f1': 0.9545313949891742,
'p': 0.9362864077669902,
'r': 0.9735015772870662},
{'a': 0.8619839802834257,
'f1': 0.8987341772151899,
'p': 0.8473998294970162,
'r': 0.9566891241578441},
{'a': 0.8974358974358975,
'f1': 0.9275237273511648,
'p': 0.8921161825726142,
'r': 0.9658580413297394}],
'thenation.com;2005': [{'a': 0.7848152659358506,
'f1': 0.8589675359233634,
'p': 0.77447216890595,
'r': 0.96415770609319},
{'a': 0.6407079646017699,
'f1': 0.6896024464831804,
'p': 0.5381861575178998,
'r': 0.9595744680851064},
{'a': 0.823003194888179,
'f1': 0.8837599664288712,
'p': 0.8118735543562067,
'r': 0.9696132596685083},
{'a': 0.8587055606198724,
'f1': 0.9137931034482758,
'p': 0.8665611814345991,
'r': 0.9664705882352941},
{'a': 0.6699834162520729,
'f1': 0.7224546722454672,
'p': 0.5974625144175317,
'r': 0.9135802469135802},
{'a': 0.8814229249011858,
'f1': 0.9029754204398448,
'p': 0.8660049627791563,
'r': 0.9432432432432433},
{'a': 0.7600254614894971,
'f1': 0.8311688311688312,
'p': 0.7238689547581904,
'r': 0.9758149316508938},
{'a': 0.8488063660477454,
'f1': 0.8747252747252748,
'p': 0.8300312825860271,
'r': 0.924506387921022},
{'a': 0.7800776196636481,
'f1': 0.8455949137148048,
'p': 0.7508064516129033,
'r': 0.9677754677754677},
{'a': 0.8517776664997496,
'f1': 0.8779884583676834,
'p': 0.8255813953488372,
'r': 0.9375},
{'a': 0.814151747655584,
'f1': 0.8736964078794901,
'p': 0.799575821845175,
'r': 0.9629629629629629},
{'a': 0.7155705452501405,
'f1': 0.7974379503602883,
'p': 0.6883206634416034,
'r': 0.9476688867745005},
{'a': 0.87,
'f1': 0.8974678289746783,
'p': 0.8505114083398898,
'r': 0.9499121265377856},
{'a': 0.7546628407460545,
'f1': 0.832517140058766,
'p': 0.7296137339055794,
'r': 0.9692132269099202},
{'a': 0.8380462724935732,
'f1': 0.8968058968058968,
'p': 0.8346036585365854,
'r': 0.9690265486725663},
{'a': 0.9326878745965883,
'f1': 0.9617400419287212,
'p': 0.9390992835209826,
'r': 0.985499462943072},
{'a': 0.8768656716417911,
'f1': 0.8896321070234113,
'p': 0.8375314861460957,
'r': 0.948644793152639},
{'a': 0.8896797153024911,
'f1': 0.916591928251121,
'p': 0.875,
'r': 0.9623352165725048},
{'a': 0.8037313432835821,
'f1': 0.8221771467207573,
'p': 0.7469287469287469,
'r': 0.9142857142857143},
{'a': 0.8593231585932316,
'f1': 0.8780207134637514,
'p': 0.8177920685959271,
'r': 0.9478260869565217},
{'a': 0.83729216152019,
'f1': 0.8651574803149606,
'p': 0.7918918918918919,
'r': 0.9533622559652929},
{'a': 0.8729314420803782,
'f1': 0.8983451536643026,
'p': 0.8467023172905526,
'r': 0.9566968781470292},
{'a': 0.8260038240917782,
'f1': 0.844621513944223,
'p': 0.7641606591143152,
'r': 0.9440203562340967},
{'a': 0.8518918918918919,
'f1': 0.879824561403509,
'p': 0.8187755102040817,
'r': 0.9507109004739337},
{'a': 0.7968515742128935,
'f1': 0.8610968733982574,
'p': 0.7720588235294118,
'r': 0.9733487833140209}],
'thenation.com;2010': [{'a': 0.6410326833287558,
'f1': 0.605016621335751,
'p': 0.455,
'r': 0.9026149684400361},
{'a': 0.6470588235294118,
'f1': 0.6098003629764065,
'p': 0.4592255125284738,
'r': 0.9072907290729073},
{'a': 0.6072772898368883,
'f1': 0.5725211690794865,
'p': 0.4190323870451819,
'r': 0.903448275862069},
{'a': 0.66090961427749,
'f1': 0.6127547666009204,
'p': 0.4641434262948207,
'r': 0.9013539651837524},
{'a': 0.6890087313816128,
'f1': 0.688768953996402,
'p': 0.5471621069824418,
'r': 0.9292649098474342},
{'a': 0.7513644633110976,
'f1': 0.3880597014925373,
'p': 0.2559055118110236,
'r': 0.8024691358024691},
{'a': 0.6087233365170328,
'f1': 0.4630843162953255,
'p': 0.3183183183183183,
'r': 0.8493589743589743},
{'a': 0.4284744612251824,
'f1': 0.40662438336856943,
'p': 0.2615001133016089,
'r': 0.9136975455265242},
{'a': 0.8423518388381354,
'f1': 0.8713930823619339,
'p': 0.8142857142857143,
'r': 0.93711467324291},
{'a': 0.758485639686684,
'f1': 0.7215951843491346,
'p': 0.6199095022624435,
'r': 0.8631863186318632},
{'a': 0.6766757658477404,
'f1': 0.6051851851851853,
'p': 0.4652619589977221,
'r': 0.8654661016949152},
{'a': 0.6650197628458498,
'f1': 0.6643564356435643,
'p': 0.5205585725368502,
'r': 0.9179206566347469},
{'a': 0.6085968175242819,
'f1': 0.624950495049505,
'p': 0.4679715302491103,
'r': 0.9404052443384983},
{'a': 0.6065616119865668,
'f1': 0.5591895803183793,
'p': 0.4052013422818792,
'r': 0.9019607843137255},
{'a': 0.6717156105100464,
'f1': 0.6101321585903083,
'p': 0.4611542730299667,
'r': 0.9013015184381779},
{'a': 0.5491822706802559,
'f1': 0.48788368336025856,
'p': 0.3348115299334812,
'r': 0.8988095238095238},
{'a': 0.726970402394413,
'f1': 0.6717313074770092,
'p': 0.5374280230326296,
'r': 0.8955223880597015},
{'a': 0.7198697068403909,
'f1': 0.7012002526847757,
'p': 0.5677749360613811,
'r': 0.916597853014038},
{'a': 0.3685328800594922,
'f1': 0.4045281506712082,
'p': 0.259111909650924,
'r': 0.9219178082191781},
{'a': 0.5321202095395644,
'f1': 0.40850470547229,
'p': 0.26745778183477864,
'r': 0.8643067846607669},
{'a': 0.4849435382685069,
'f1': 0.4372858122001371,
'p': 0.28868778280542984,
'r': 0.9011299435028248},
{'a': 0.756413952147593,
'f1': 0.7538595980192252,
'p': 0.6330724070450098,
'r': 0.931605471562275},
{'a': 0.5558358174730567,
'f1': 0.5285957653930397,
'p': 0.3719178082191781,
'r': 0.913372582001682},
{'a': 0.7757724911129341,
'f1': 0.7651775486827033,
'p': 0.6549019607843137,
'r': 0.9201101928374655},
{'a': 0.6724590163934426,
'f1': 0.5914110429447852,
'p': 0.44112263575350824,
'r': 0.8970223325062034}],
'thenation.com;2015': [{'a': 0.6476377952755905,
'f1': 0.716547901821061,
'p': 0.5780900670712232,
'r': 0.9422175950026028},
{'a': 0.5456389452332657,
'f1': 0.5290819901892082,
'p': 0.3743182944967774,
'r': 0.9020310633213859},
{'a': 0.5882171499074645,
'f1': 0.6124818577648766,
'p': 0.45770065075921906,
'r': 0.9254385964912281},
{'a': 0.7331489802972693,
'f1': 0.8134815172747041,
'p': 0.7154696132596685,
'r': 0.9426091825307951},
{'a': 0.616156282998944,
'f1': 0.6692447679708825,
'p': 0.5188712522045855,
'r': 0.9423446508648302},
{'a': 0.5764468758997985,
'f1': 0.6134034165571616,
'p': 0.4694288012872084,
'r': 0.8847611827141774},
{'a': 0.5805320081847413,
'f1': 0.6194643330681516,
'p': 0.463676061929337,
'r': 0.9329073482428115},
{'a': 0.4491803278688525,
'f1': 0.2851063829787234,
'p': 0.17290322580645162,
'r': 0.8121212121212121},
{'a': 0.5272599366865987,
'f1': 0.5044247787610618,
'p': 0.35312338668043364,
'r': 0.8825806451612903},
{'a': 0.5849114233353696,
'f1': 0.6190075693860387,
'p': 0.4634760705289673,
'r': 0.9316455696202531},
{'a': 0.41858367162832655,
'f1': 0.1377926421404682,
'p': 0.07612712490761271,
'r': 0.7253521126760564},
{'a': 0.5618803965462105,
'f1': 0.5648030495552732,
'p': 0.4098662978331028,
'r': 0.9080694586312564},
{'a': 0.5824014125956445,
'f1': 0.617829248586049,
'p': 0.46663954434499594,
'r': 0.9139442231075697},
{'a': 0.583029197080292,
'f1': 0.6156433978132885,
'p': 0.4594142259414226,
'r': 0.9328802039082413},
{'a': 0.5821352095442032,
'f1': 0.6165075800112296,
'p': 0.4625105307497894,
'r': 0.9242424242424242},
{'a': 0.4474337748344371,
'f1': 0.27485062466051063,
'p': 0.16546762589928057,
'r': 0.8108974358974359},
{'a': 0.6857142857142857,
'f1': 0.7595021863437607,
'p': 0.6358772176851591,
'r': 0.9427974947807933},
{'a': 0.7482485531526043,
'f1': 0.8306872887432142,
'p': 0.7277458722182341,
'r': 0.9675495108565975},
{'a': 0.6837748344370861,
'f1': 0.7597484276729559,
'p': 0.6456440406199893,
'r': 0.9228418640183346},
{'a': 0.7082872928176795,
'f1': 0.7900318133616119,
'p': 0.6710200405314118,
'r': 0.9603609410248147},
{'a': 0.580554689934559,
'f1': 0.5982089552238806,
'p': 0.445729537366548,
'r': 0.9092558983666061},
{'a': 0.5946643717728055,
'f1': 0.6409148665819567,
'p': 0.5009932459276917,
'r': 0.889280677009873},
{'a': 0.7188273367599467,
'f1': 0.7975880498835137,
'p': 0.683419445749178,
'r': 0.9575518262586377},
{'a': 0.43523767989533363,
'f1': 0.2127659574468085,
'p': 0.12315270935960591,
'r': 0.78125},
{'a': 0.5316718587746625,
'f1': 0.5169582292038558,
'p': 0.36163836163836166,
'r': 0.9061326658322904}],
'www.cnn.com;2000': [{'a': 0.7115885416666666,
'f1': 0.64813343923749,
'p': 0.5106382978723404,
'r': 0.8869565217391304},
{'a': 0.6905885274397815,
'f1': 0.7581521739130436,
'p': 0.637402088772846,
'r': 0.9353448275862069},
{'a': 0.954779622209502,
'f1': 0.9730651210364815,
'p': 0.9844774060020697,
'r': 0.9619143916413886},
{'a': 0.6161309884778654,
'f1': 0.5208175624526873,
'p': 0.3659574468085106,
'r': 0.9028871391076115},
{'a': 0.7039295392953929,
'f1': 0.5857819905213271,
'p': 0.43338008415147267,
'r': 0.9035087719298246},
{'a': 0.7941073307611365,
'f1': 0.8425851434700993,
'p': 0.7470280551592963,
'r': 0.9661746617466175},
{'a': 0.7667009249743063,
'f1': 0.7878504672897196,
'p': 0.6717131474103586,
'r': 0.9525423728813559},
{'a': 0.7385456073980664,
'f1': 0.7680835197613722,
'p': 0.6523115896136795,
'r': 0.9338168631006346},
{'a': 0.7407594936708861,
'f1': 0.7455268389662026,
'p': 0.625,
'r': 0.9236453201970444},
{'a': 0.7487775061124694,
'f1': 0.7363694676074407,
'p': 0.6106382978723405,
'r': 0.9273021001615509},
{'a': 0.6857402361489555,
'f1': 0.7030042918454936,
'p': 0.5605749486652978,
'r': 0.9424626006904487},
{'a': 0.7878956135480288,
'f1': 0.792391304347826,
'p': 0.6813084112149532,
'r': 0.9467532467532468},
{'a': 0.6663157894736842,
'f1': 0.6504961411245865,
'p': 0.49538203190596136,
'r': 0.9470304975922953},
{'a': 0.7472024415055951,
'f1': 0.7550517496303598,
'p': 0.6372712146422629,
'r': 0.9262394195888755},
{'a': 0.813499804916114,
'f1': 0.8530135301353012,
'p': 0.7739955357142857,
'r': 0.95},
{'a': 0.6285881663737551,
'f1': 0.5560224089635855,
'p': 0.39502487562189054,
'r': 0.9385342789598109},
{'a': 0.7454438565549677,
'f1': 0.7380520266182699,
'p': 0.615539858728557,
'r': 0.9214501510574018},
{'a': 0.5698852126941256,
'f1': 0.3546099290780142,
'p': 0.22095959595959597,
'r': 0.8974358974358975},
{'a': 0.7482403898213319,
'f1': 0.7406581148912438,
'p': 0.6153846153846154,
'r': 0.9299719887955182},
{'a': 0.7352941176470589,
'f1': 0.6995645863570392,
'p': 0.5604651162790698,
'r': 0.9305019305019305},
{'a': 0.7090414683888511,
'f1': 0.5884615384615385,
'p': 0.4340425531914894,
'r': 0.9134328358208955},
{'a': 0.7859608745684695,
'f1': 0.8243626062322947,
'p': 0.7487135506003431,
'r': 0.917016806722689},
{'a': 0.8079650845608293,
'f1': 0.8574898785425101,
'p': 0.7690631808278867,
'r': 0.9688929551692589},
{'a': 0.7228381374722838,
'f1': 0.7920133111480865,
'p': 0.6784922394678492,
'r': 0.9511545293072824},
{'a': 0.6891631315079059,
'f1': 0.7028023598820058,
'p': 0.5669244497323022,
'r': 0.9243452958292919}],
'www.cnn.com;2005': [{'a': 0.7604639435199193,
'f1': 0.6291959406713504,
'p': 0.49266503667481665,
'r': 0.8704103671706264},
{'a': 0.735812133072407,
'f1': 0.6148359486447932,
'p': 0.47001090512540894,
'r': 0.8886597938144329},
{'a': 0.7785204991087344,
'f1': 0.762087123025371,
'p': 0.6461038961038961,
'r': 0.9288214702450408},
{'a': 0.7536363636363637,
'f1': 0.681551116333725,
'p': 0.558766859344894,
'r': 0.8734939759036144},
{'a': 0.7412398921832885,
'f1': 0.5580110497237569,
'p': 0.4139344262295082,
'r': 0.8559322033898306},
{'a': 0.7816185441236275,
'f1': 0.7494167055529631,
'p': 0.630298273155416,
'r': 0.9240506329113924},
{'a': 0.734946539110861,
'f1': 0.5936151855047456,
'p': 0.44272844272844275,
'r': 0.900523560209424},
{'a': 0.7412398921832885,
'f1': 0.5683453237410072,
'p': 0.4253028263795424,
'r': 0.8563685636856369},
{'a': 0.781571369806664,
'f1': 0.7799419809365934,
'p': 0.676491732566499,
'r': 0.9207436399217221},
{'a': 0.7792062604807155,
'f1': 0.5129469790382244,
'p': 0.37142857142857144,
'r': 0.8286852589641435},
{'a': 0.7954345917471466,
'f1': 0.7620020429009193,
'p': 0.6481320590790617,
'r': 0.9244114002478315},
{'a': 0.746031746031746,
'f1': 0.6,
'p': 0.456441717791411,
'r': 0.8752941176470588},
{'a': 0.7453857075248462,
'f1': 0.6515544041450778,
'p': 0.513265306122449,
'r': 0.8918439716312057},
{'a': 0.8256265442993294,
'f1': 0.805205047318612,
'p': 0.7230878186968839,
'r': 0.9083629893238434},
{'a': 0.7852965747702589,
'f1': 0.7640036730945823,
'p': 0.6561514195583596,
'r': 0.9142857142857143},
{'a': 0.7494908350305499,
'f1': 0.6138147566718996,
'p': 0.4768292682926829,
'r': 0.8612334801762115},
{'a': 0.8243348982785602,
'f1': 0.7677185721676152,
'p': 0.6666666666666666,
'r': 0.9048780487804878},
{'a': 0.7411912751677853,
'f1': 0.6824498198661864,
'p': 0.5590219224283305,
'r': 0.8758256274768824},
{'a': 0.7430267062314541,
'f1': 0.5848513902205178,
'p': 0.4407514450867052,
'r': 0.8689458689458689},
{'a': 0.735144312393888,
'f1': 0.4935064935064934,
'p': 0.3475609756097561,
'r': 0.8507462686567164},
{'a': 0.7769423558897243,
'f1': 0.7415295256534365,
'p': 0.6283839212469237,
'r': 0.9043683589138135},
{'a': 0.7738809413936317,
'f1': 0.7461139896373057,
'p': 0.6260869565217392,
'r': 0.9230769230769231},
{'a': 0.7795648060548723,
'f1': 0.7039390088945362,
'p': 0.5729058945191313,
'r': 0.9126853377265239},
{'a': 0.7626004382761139,
'f1': 0.603174603174603,
'p': 0.4678030303030303,
'r': 0.8487972508591065},
{'a': 0.7890625,
'f1': 0.8150684931506849,
'p': 0.719758064516129,
'r': 0.9394736842105263}],
'www.cnn.com;2010': [{'a': 0.6355511420059583,
'f1': 0.6728083209509659,
'p': 0.5226223453370268,
'r': 0.9441201000834029},
{'a': 0.5179190751445086,
'f1': 0.47326315789473683,
'p': 0.31986340352874215,
'r': 0.9093851132686084},
{'a': 0.6284875183553598,
'f1': 0.6507936507936507,
'p': 0.4994703389830508,
'r': 0.9336633663366337},
{'a': 0.6361153262518968,
'f1': 0.6920113023375288,
'p': 0.5435835351089588,
'r': 0.9519434628975265},
{'a': 0.47166841552990557,
'f1': 0.2138953942232631,
'p': 0.12286995515695068,
'r': 0.8253012048192772},
{'a': 0.5136150234741784,
'f1': 0.3884297520661157,
'p': 0.24736842105263157,
'r': 0.9038461538461539},
{'a': 0.4337180832458902,
'f1': 0.3832380952380952,
'p': 0.24276061776061775,
'r': 0.9095840867992767},
{'a': 0.5791583166332666,
'f1': 0.5635910224438903,
'p': 0.4033313503866746,
'r': 0.9351724137931035},
{'a': 0.5577639751552795,
'f1': 0.5672609400324149,
'p': 0.4117647058823529,
'r': 0.9114583333333334},
{'a': 0.6279300099042588,
'f1': 0.6878980891719745,
'p': 0.5440210249671484,
'r': 0.9352409638554217},
{'a': 0.46415770609318996,
'f1': 0.3118527042577675,
'p': 0.1892458100558659,
'r': 0.8856209150326797},
{'a': 0.489742923915866,
'f1': 0.5282112845138055,
'p': 0.36813922356091033,
'r': 0.9345794392523364},
{'a': 0.5102420856610801,
'f1': 0.5247560534875316,
'p': 0.36890243902439024,
'r': 0.9086357947434293},
{'a': 0.5880765456329735,
'f1': 0.6600526422352703,
'p': 0.5027760641579272,
'r': 0.9605185621685327},
{'a': 0.2570775676018789,
'f1': 0.11171827565270188,
'p': 0.05998370008149959,
'r': 0.8123620309050773},
{'a': 0.4733704292527822,
'f1': 0.39359267734553777,
'p': 0.2508751458576429,
'r': 0.9129511677282378},
{'a': 0.5960061443932412,
'f1': 0.6676775334849634,
'p': 0.516015625,
'r': 0.945597709377237},
{'a': 0.5098305084745762,
'f1': 0.49930747922437674,
'p': 0.3433333333333333,
'r': 0.9149746192893401},
{'a': 0.6769271948608137,
'f1': 0.7574844283705043,
'p': 0.6260378611756892,
'r': 0.9587995930824008},
{'a': 0.4728294815740162,
'f1': 0.39222275564090253,
'p': 0.25045984058859594,
'r': 0.9037610619469026},
{'a': 0.12174540209159755,
'f1': 0.0698189595905584,
'p': 0.036425952494819065,
'r': 0.8385321100917431},
{'a': 0.6018776077885952,
'f1': 0.65123362777947,
'p': 0.5016424213984045,
'r': 0.9279513888888888},
{'a': 0.5276254744833404,
'f1': 0.4706994328922496,
'p': 0.3178047223994895,
'r': 0.907103825136612},
{'a': 0.5953020134228187,
'f1': 0.6228893058161351,
'p': 0.4676056338028169,
'r': 0.9325842696629213},
{'a': 0.5067637877211238,
'f1': 0.4906876790830946,
'p': 0.3374384236453202,
'r': 0.8989501312335958}],
'www.cnn.com;2015': [{'a': 0.5170128148475475,
'f1': 0.61418990469467,
'p': 0.45194805194805193,
'r': 0.9581497797356828},
{'a': 0.2853522554485555,
'f1': 0.34357541899441335,
'p': 0.2137891077636153,
'r': 0.8744075829383886},
{'a': 0.3316355404640634,
'f1': 0.3208740655549166,
'p': 0.1957894736842105,
'r': 0.8885350318471338},
{'a': 0.19763263580638343,
'f1': 0.29152668906308327,
'p': 0.17149758454106281,
'r': 0.9713930348258707},
{'a': 0.2777526053466244,
'f1': 0.34349258649093906,
'p': 0.20996978851963746,
'r': 0.9434389140271493},
{'a': 0.4316002019182231,
'f1': 0.4914182475158084,
'p': 0.3364254792826221,
'r': 0.9112227805695142},
{'a': 0.399438202247191,
'f1': 0.4340921122286925,
'p': 0.2839335180055402,
'r': 0.9213483146067416},
{'a': 0.48381294964028776,
'f1': 0.5713218820014937,
'p': 0.40865384615384615,
'r': 0.9491315136476427},
{'a': 0.25021533161068044,
'f1': 0.2805785123966943,
'p': 0.16585246702491452,
'r': 0.9101876675603218},
{'a': 0.06572411157814291,
'f1': 0.07421431276031806,
'p': 0.03865877712031558,
'r': 0.9245283018867925},
{'a': 0.26131591227251516,
'f1': 0.3255219429058372,
'p': 0.1974160206718346,
'r': 0.9271844660194175},
{'a': 0.22401433691756273,
'f1': 0.21129326047358837,
'p': 0.1197110423116615,
'r': 0.8992248062015504},
{'a': 0.5113829425756031,
'f1': 0.5426208651399491,
'p': 0.3835431654676259,
'r': 0.9271739130434783},
{'a': 0.6100427350427351,
'f1': 0.7197338111082672,
'p': 0.5727087576374745,
'r': 0.9683195592286501},
{'a': 0.4633901705115346,
'f1': 0.5931558935361216,
'p': 0.42731921110299487,
'r': 0.9693454846727423},
{'a': 0.47280744732974034,
'f1': 0.5531561461794021,
'p': 0.3892460549386324,
'r': 0.9555236728837877},
{'a': 0.15528531337698784,
'f1': 0.22188711762171479,
'p': 0.12533463129715258,
'r': 0.9662288930581614},
{'a': 0.35905224787363305,
'f1': 0.35236341313689384,
'p': 0.21891685736079328,
'r': 0.9025157232704403},
{'a': 0.5113168724279835,
'f1': 0.5401742497579864,
'p': 0.3828911253430924,
'r': 0.9167579408543264},
{'a': 0.25434439178515006,
'f1': 0.36359550561797754,
'p': 0.22378976486860305,
'r': 0.9688622754491018},
{'a': 0.23699648025029332,
'f1': 0.25277671390271933,
'p': 0.1467318808359271,
'r': 0.9116022099447514},
{'a': 0.6115650534255186,
'f1': 0.7043062200956938,
'p': 0.5546345139412208,
'r': 0.9646133682830931},
{'a': 0.42189712128362433,
'f1': 0.40792653455775735,
'p': 0.2613003095975232,
'r': 0.9295154185022027},
{'a': 0.5567402894135567,
'f1': 0.6641661858049625,
'p': 0.5061565523306948,
'r': 0.9656040268456376},
{'a': 0.40637813211845103,
'f1': 0.38912330051570554,
'p': 0.24924924924924924,
'r': 0.8867521367521367}],
'www.esquire.com;2000': [{'a': 0.9523169912693082,
'f1': 0.9338303821062441,
'p': 0.9243542435424354,
'r': 0.943502824858757},
{'a': 0.9595898673100121,
'f1': 0.9559500328731099,
'p': 0.9453836150845254,
'r': 0.9667553191489362},
{'a': 0.9336633663366337,
'f1': 0.9189842805320435,
'p': 0.9069212410501193,
'r': 0.9313725490196079},
{'a': 0.9518469656992085,
'f1': 0.9205658324265507,
'p': 0.9,
'r': 0.9420935412026726},
{'a': 0.9587529976019185,
'f1': 0.9579667644183774,
'p': 0.9542356377799416,
'r': 0.9617271835132483},
{'a': 0.9554423933800127,
'f1': 0.932301740812379,
'p': 0.9111531190926276,
'r': 0.9544554455445544},
{'a': 0.918918918918919,
'f1': 0.8449438202247191,
'p': 0.8,
'r': 0.8952380952380953},
{'a': 0.9373205741626794,
'f1': 0.9378262933080209,
'p': 0.9303201506591338,
'r': 0.9454545454545454},
{'a': 0.9316189362945646,
'f1': 0.8899341486359361,
'p': 0.8663003663003663,
'r': 0.9148936170212766},
{'a': 0.9454813359528488,
'f1': 0.9482034531031265,
'p': 0.9407407407407408,
'r': 0.955785512699906},
{'a': 0.9405286343612335,
'f1': 0.9114754098360656,
'p': 0.9006479481641468,
'r': 0.922566371681416},
{'a': 0.9360599078341014,
'f1': 0.9105560032232071,
'p': 0.9011164274322169,
'r': 0.9201954397394136},
{'a': 0.9609218436873748,
'f1': 0.9582441113490363,
'p': 0.9501061571125266,
'r': 0.9665226781857451},
{'a': 0.9317794201250711,
'f1': 0.8960138648180244,
'p': 0.8689075630252101,
'r': 0.924865831842576},
{'a': 0.9338624338624338,
'f1': 0.9107780157030693,
'p': 0.8960674157303371,
'r': 0.9259796806966618},
{'a': 0.9457601222307105,
'f1': 0.9122373300370828,
'p': 0.8870192307692307,
'r': 0.9389312977099237},
{'a': 0.9534767383691846,
'f1': 0.9570835256114444,
'p': 0.9513761467889909,
'r': 0.9628597957288765},
{'a': 0.937328202308961,
'f1': 0.9188034188034189,
'p': 0.9110169491525424,
'r': 0.9267241379310345},
{'a': 0.9052750992626205,
'f1': 0.8440709617180205,
'p': 0.7971781305114638,
'r': 0.8968253968253969},
{'a': 0.9100062932662052,
'f1': 0.7604690117252932,
'p': 0.6816816816816816,
'r': 0.8598484848484849},
{'a': 0.9503239740820735,
'f1': 0.9290853031860227,
'p': 0.9149797570850202,
'r': 0.9436325678496869},
{'a': 0.9584045584045584,
'f1': 0.9570335491465568,
'p': 0.9453488372093023,
'r': 0.9690107270560191},
{'a': 0.9335476956055734,
'f1': 0.9107913669064749,
'p': 0.9017094017094017,
'r': 0.9200581395348837},
{'a': 0.1,
'f1': 0.09999999999999999,
'p': 0.05555555555555555,
'r': 0.5},
{'a': 0.9337521416333524,
'f1': 0.8975265017667844,
'p': 0.872852233676976,
'r': 0.9236363636363636}],
'www.esquire.com;2005': [{'a': 0.9537231804795961,
'f1': 0.9483568075117371,
'p': 0.9369202226345084,
'r': 0.9600760456273765},
{'a': 0.9509594882729211,
'f1': 0.9441476444876153,
'p': 0.9274809160305344,
'r': 0.9614243323442137},
{'a': 0.9518129770992366,
'f1': 0.9308692676249145,
'p': 0.9030544488711819,
'r': 0.96045197740113},
{'a': 0.9571167883211679,
'f1': 0.9470720720720721,
'p': 0.9251925192519251,
'r': 0.9700115340253749},
{'a': 0.9543103448275863,
'f1': 0.9475247524752476,
'p': 0.9336585365853659,
'r': 0.9618090452261306},
{'a': 0.9180487804878049,
'f1': 0.8821879382889201,
'p': 0.8243774574049804,
'r': 0.9487179487179487},
{'a': 0.8962640551323903,
'f1': 0.9104571070757671,
'p': 0.8568061284619918,
'r': 0.9712758851035405},
{'a': 0.9136775719353567,
'f1': 0.9010393131495706,
'p': 0.8463497453310697,
'r': 0.9632850241545894},
{'a': 0.9537397319498487,
'f1': 0.9468455042225534,
'p': 0.9306640625,
'r': 0.9635995955510617},
{'a': 0.9086340794883508,
'f1': 0.8802395209580839,
'p': 0.9119106699751861,
'r': 0.8506944444444444},
{'a': 0.9370045890696704,
'f1': 0.9246882793017456,
'p': 0.8913461538461539,
'r': 0.960621761658031},
{'a': 0.9195461578133058,
'f1': 0.8704318936877076,
'p': 0.8061538461538461,
'r': 0.9458483754512635},
{'a': 0.898936170212766,
'f1': 0.8230912476722532,
'p': 0.728171334431631,
'r': 0.9464668094218416},
{'a': 0.9373309287646528,
'f1': 0.9295489102889002,
'p': 0.8999018645731109,
'r': 0.9612159329140462},
{'a': 0.946358482337549,
'f1': 0.9364341085271318,
'p': 0.9254341164453525,
'r': 0.9476987447698745},
{'a': 0.8785743887277249,
'f1': 0.8544461003477396,
'p': 0.7768744354110207,
'r': 0.9492273730684326},
{'a': 0.9572310405643739,
'f1': 0.9495054659031754,
'p': 0.9306122448979591,
'r': 0.9691817215727949},
{'a': 0.9576874738165061,
'f1': 0.9529576152771309,
'p': 0.9419889502762431,
'r': 0.9641847313854854},
{'a': 0.9569296375266525,
'f1': 0.9510421715947648,
'p': 0.9378585086042065,
'r': 0.9646017699115044},
{'a': 0.9632217727105553,
'f1': 0.964513839602555,
'p': 0.9530154277699859,
'r': 0.9762931034482759},
{'a': 0.9084611578426521,
'f1': 0.8626577579806979,
'p': 0.8938461538461538,
'r': 0.8335724533715926},
{'a': 0.9439672801635992,
'f1': 0.9384822631342613,
'p': 0.9118673647469459,
'r': 0.9666975023126735},
{'a': 0.9485014774166315,
'f1': 0.9420702754036088,
'p': 0.9323308270676691,
'r': 0.9520153550863724},
{'a': 0.9511054421768708,
'f1': 0.9446849446849447,
'p': 0.9308056872037914,
'r': 0.958984375},
{'a': 0.9588189588189588,
'f1': 0.9576338928856913,
'p': 0.9507936507936507,
'r': 0.964573268921095}],
'www.esquire.com;2010': [{'a': 0.5707632600258732,
'f1': 0.4548143279658232,
'p': 0.31199278629395855,
'r': 0.8387878787878787},
{'a': 0.6742880064179703,
'f1': 0.7208662770711586,
'p': 0.5935465609963204,
'r': 0.9177242888402626},
{'a': 0.49156391953277095,
'f1': 0.22387320455671122,
'p': 0.13093858632676708,
'r': 0.7713310580204779},
{'a': 0.5876421923474664,
'f1': 0.5550906555090656,
'p': 0.40912828947368424,
'r': 0.8629661751951431},
{'a': 0.5996454798683211,
'f1': 0.5883884405102838,
'p': 0.4396887159533074,
'r': 0.8890637293469709},
{'a': 0.5264499681325685,
'f1': 0.418622848200313,
'p': 0.27835587929240374,
'r': 0.8438485804416404},
{'a': 0.47368421052631576,
'f1': 0.3080357142857143,
'p': 0.18945634266886327,
'r': 0.8233890214797136},
{'a': 0.5769369369369369,
'f1': 0.5536121673003803,
'p': 0.394579945799458,
'r': 0.9273885350318471},
{'a': 0.5820737204985414,
'f1': 0.5504848830576156,
'p': 0.39630390143737165,
'r': 0.9010270774976658},
{'a': 0.6064501051647582,
'f1': 0.6052508204406938,
'p': 0.4672457473760405,
'r': 0.8589487691284099},
{'a': 0.546473482777474,
'f1': 0.46775745909528393,
'p': 0.3263205013428827,
'r': 0.82559456398641},
{'a': 0.5738841142223454,
'f1': 0.5222256760957413,
'p': 0.3721754541426673,
'r': 0.875},
{'a': 0.6255188988420363,
'f1': 0.6473251028806585,
'p': 0.5077469335054874,
'r': 0.8927355278093076},
{'a': 0.513375796178344,
'f1': 0.3154121863799283,
'p': 0.1969781757134863,
'r': 0.7910112359550562},
{'a': 0.6101694915254238,
'f1': 0.6086533787068547,
'p': 0.4623338257016248,
'r': 0.8904694167852063},
{'a': 0.47749025168380005,
'f1': 0.20151679306608886,
'p': 0.1154562383612663,
'r': 0.7914893617021277},
{'a': 0.42452478568766305,
'f1': 0.18479408658922913,
'p': 0.1044776119402985,
'r': 0.7990867579908676},
{'a': 0.5518433179723502,
'f1': 0.49838813668600906,
'p': 0.347103726986978,
'r': 0.8834285714285715},
{'a': 0.5635478637101136,
'f1': 0.5432937181663837,
'p': 0.39603960396039606,
'r': 0.8648648648648649},
{'a': 0.6503097893432466,
'f1': 0.6610617343262071,
'p': 0.5278097429996165,
'r': 0.884318766066838},
{'a': 0.4560916348490108,
'f1': 0.27688047992616516,
'p': 0.1662971175166297,
'r': 0.8264462809917356},
{'a': 0.4507603186097031,
'f1': 0.2689156626506024,
'p': 0.16080691642651296,
'r': 0.8205882352941176},
{'a': 0.6857645678529527,
'f1': 0.7457680746717291,
'p': 0.623875066172578,
'r': 0.9268580416830515},
{'a': 0.60888671875,
'f1': 0.61323032351521,
'p': 0.4643510054844607,
'r': 0.9026297085998578},
{'a': 0.617309390638799,
'f1': 0.6264367816091954,
'p': 0.49254405784003613,
'r': 0.8602999210734017}],
'www.esquire.com;2015': [{'a': 0.27295273525063196,
'f1': 0.013716356107660454,
'p': 0.0069312757470738244,
'r': 0.6503067484662577},
{'a': 0.27782218251915936,
'f1': 0.03298917149332661,
'p': 0.01688688366097325,
'r': 0.7100271002710027},
{'a': 0.270252355475593,
'f1': 0.014324998401227858,
'p': 0.007236544549977386,
'r': 0.7},
{'a': 0.44490750152470016,
'f1': 0.07862324953602161,
'p': 0.041473834104663584,
'r': 0.7540453074433657},
{'a': 0.3052035589107576,
'f1': 0.12948992230604664,
'p': 0.07025045815516189,
'r': 0.8261494252873564},
{'a': 0.3222323799573375,
'f1': 0.1877184744613137,
'p': 0.10510019279079277,
'r': 0.8775609756097561},
{'a': 0.2809550249861188,
'f1': 0.06057308668842945,
'p': 0.03149160852347728,
'r': 0.7914691943127962},
{'a': 0.2777961536644143,
'f1': 0.03155493600304145,
'p': 0.016132167152575316,
'r': 0.7175792507204611},
{'a': 0.3703675167578396,
'f1': 0.17320922703358968,
'p': 0.09621220636169496,
'r': 0.867274569402229},
{'a': 0.4766349363332921,
'f1': 0.5339095012661016,
'p': 0.37296673716592965,
'r': 0.9391885349084923},
{'a': 0.2828363092455829,
'f1': 0.050498174391979705,
'p': 0.026110328938947908,
'r': 0.7654784240150094},
{'a': 0.3812192351382052,
'f1': 0.21030250314100704,
'p': 0.11967880321196787,
'r': 0.8662420382165605},
{'a': 0.2810142006842574,
'f1': 0.04494801718234452,
'p': 0.02316627093627671,
'r': 0.7520833333333333},
{'a': 0.3023045639403525,
'f1': 0.11376420617609918,
'p': 0.06114641821435182,
'r': 0.8156378600823045},
{'a': 0.29364024026777935,
'f1': 0.08221626452189454,
'p': 0.0433580495161493,
'r': 0.7921928817451206},
{'a': 0.3496291570300662,
'f1': 0.0953965612867443,
'p': 0.05066572404854483,
'r': 0.8143939393939394},
{'a': 0.28344277673545965,
'f1': 0.05247162438752093,
'p': 0.02712581762216237,
'r': 0.7996219281663516},
{'a': 0.27830433347479605,
'f1': 0.024351373748964115,
'p': 0.012379285760580725,
'r': 0.7403100775193798},
{'a': 0.34906643160509654,
'f1': 0.09128537867770443,
'p': 0.04831261101243339,
'r': 0.8259109311740891},
{'a': 0.3139565561516138,
'f1': 0.16387995712754552,
'p': 0.09041986989946776,
'r': 0.8737142857142857},
{'a': 0.27785878921868384,
'f1': 0.02303678248695431,
'p': 0.011709904897457463,
'r': 0.7042801556420234},
{'a': 0.41080617495711835,
'f1': 0.3030098072370646,
'p': 0.18252189855367693,
'r': 0.891542288557214},
{'a': 0.32447016841463944,
'f1': 0.18287097962836238,
'p': 0.1024355723300112,
'r': 0.8514705882352941},
{'a': 0.3203595427175146,
'f1': 0.17140121289498883,
'p': 0.09539317858834676,
'r': 0.8434554973821989},
{'a': 0.34118318122555413,
'f1': 0.058241118229470014,
'p': 0.030204180258547784,
'r': 0.8116883116883117},
{'a': 0.2778430175207854,
'f1': 0.03368950345694532,
'p': 0.01725025746652935,
'r': 0.7165775401069518}],
'www.forbes.com;2000': [{'a': 0.7226666666666667,
'f1': 0.7555816686251468,
'p': 0.6218568665377177,
'r': 0.9625748502994012},
{'a': 0.7088150289017341,
'f1': 0.7286195286195286,
'p': 0.5880434782608696,
'r': 0.9575221238938053},
{'a': 0.8641169154228856,
'f1': 0.914263292132627,
'p': 0.8623242042931162,
'r': 0.9728601252609603},
{'a': 0.7454672245467224,
'f1': 0.7767584097859327,
'p': 0.6526207605344296,
'r': 0.959214501510574},
{'a': 0.6517626827171109,
'f1': 0.4993819530284302,
'p': 0.346483704974271,
'r': 0.8938053097345132},
{'a': 0.717577353522054,
'f1': 0.7385740402193783,
'p': 0.6041874376869392,
'r': 0.9498432601880877},
{'a': 0.7900641025641025,
'f1': 0.8412121212121212,
'p': 0.7565406976744186,
'r': 0.9472247497725205},
{'a': 0.6537890044576523,
'f1': 0.6277955271565495,
'p': 0.49125,
'r': 0.8694690265486725},
{'a': 0.8040330920372285,
'f1': 0.8549559892843476,
'p': 0.7629781420765027,
'r': 0.9721496953872933},
{'a': 0.9619460500963392,
'f1': 0.9751494180559925,
'p': 0.9657320872274143,
'r': 0.9847522236340533},
{'a': 0.6940350877192982,
'f1': 0.6993103448275861,
'p': 0.5602209944751381,
'r': 0.9302752293577982},
{'a': 0.8702662167229096,
'f1': 0.9161415414444983,
'p': 0.8626198083067093,
'r': 0.9767441860465116},
{'a': 0.6599839615076183,
'f1': 0.6430976430976431,
'p': 0.48538754764930114,
'r': 0.9526184538653366},
{'a': 0.6732312180889861,
'f1': 0.6600910470409711,
'p': 0.5190930787589498,
'r': 0.90625},
{'a': 0.602502406159769,
'f1': 0.44712182061579653,
'p': 0.30474452554744524,
'r': 0.8391959798994975},
{'a': 0.7344262295081967,
'f1': 0.7788898999090083,
'p': 0.6892109500805152,
'r': 0.895397489539749},
{'a': 0.7785825142265907,
'f1': 0.8304278922345483,
'p': 0.7364722417427969,
'r': 0.9518619436875567},
{'a': 0.7634961439588689,
'f1': 0.8130081300813008,
'p': 0.7183908045977011,
'r': 0.9363295880149812},
{'a': 0.7300177619893428,
'f1': 0.7738095238095237,
'p': 0.64891846921797,
'r': 0.9582309582309583},
{'a': 0.5676077265973254,
'f1': 0.28325123152709364,
'p': 0.17062314540059348,
'r': 0.8333333333333334},
{'a': 0.9127272727272727,
'f1': 0.84,
'p': 0.7777777777777778,
'r': 0.9130434782608695},
{'a': 0.6705118961788031,
'f1': 0.6612305411415863,
'p': 0.5204200700116686,
'r': 0.9065040650406504},
{'a': 0.7873270211216314,
'f1': 0.8172715894868585,
'p': 0.7183718371837183,
'r': 0.9477503628447025},
{'a': 0.7077562326869806,
'f1': 0.7291399229781772,
'p': 0.5916666666666667,
'r': 0.9498327759197325},
{'a': 0.8645381277123373,
'f1': 0.9144981412639405,
'p': 0.8598233995584988,
'r': 0.9765984120351023},
{'a': 0.7099596231493943,
'f1': 0.7398913699456848,
'p': 0.6339193381592554,
'r': 0.8884057971014493},
{'a': 0.6931254429482636,
'f1': 0.687364620938628,
'p': 0.5402951191827469,
'r': 0.9444444444444444},
{'a': 0.9101063829787234,
'f1': 0.9345215032932973,
'p': 0.919908466819222,
'r': 0.9496062992125984}],
'www.forbes.com;2005': [{'a': 0.7949358059914408,
'f1': 0.34285714285714286,
'p': 0.22156573116691286,
'r': 0.7575757575757576},
{'a': 0.8146574614976102,
'f1': 0.7458120903131829,
'p': 0.6436203645505971,
'r': 0.8865800865800866},
{'a': 0.816696269982238,
'f1': 0.5095057034220533,
'p': 0.37119113573407203,
'r': 0.8121212121212121},
{'a': 0.7801600914808462,
'f1': 0.5652911249293386,
'p': 0.42771599657827203,
'r': 0.8333333333333334},
{'a': 0.8255597014925373,
'f1': 0.6721215663354763,
'p': 0.5348837209302325,
'r': 0.9040880503144654},
{'a': 0.8369250562881956,
'f1': 0.694394213381555,
'p': 0.5597667638483965,
'r': 0.9142857142857143},
{'a': 0.7965587044534413,
'f1': 0.44321329639889195,
'p': 0.31007751937984496,
'r': 0.7766990291262136},
{'a': 0.8190059393560488,
'f1': 0.6356198867212084,
'p': 0.5055055055055055,
'r': 0.8559322033898306},
{'a': 0.8499008779382611,
'f1': 0.7831423895253682,
'p': 0.673469387755102,
'r': 0.9354838709677419},
{'a': 0.7619047619047619,
'f1': 0.02457757296466974,
'p': 0.013769363166953529,
'r': 0.11428571428571428},
{'a': 0.8148030495552732,
'f1': 0.4233432245301681,
'p': 0.30440967283072545,
'r': 0.6948051948051948},
{'a': 0.8180186647523331,
'f1': 0.4657534246575343,
'p': 0.32838038632986627,
'r': 0.8007246376811594},
{'a': 0.8113421550094518,
'f1': 0.33731739707835323,
'p': 0.21972318339100347,
'r': 0.7257142857142858},
{'a': 0.8060967944688875,
'f1': 0.6644915715062534,
'p': 0.5303819444444444,
'r': 0.8893740902474527},
{'a': 0.817174515235457,
'f1': 0.664406779661017,
'p': 0.5374771480804388,
'r': 0.8698224852071006},
{'a': 0.810659709280656,
'f1': 0.4051522248243559,
'p': 0.2768,
'r': 0.7554585152838428},
{'a': 0.8315127441244621,
'f1': 0.6924471299093655,
'p': 0.5601173020527859,
'r': 0.9066455696202531},
{'a': 0.8406998158379374,
'f1': 0.7474452554744526,
'p': 0.634186622625929,
'r': 0.909952606635071},
{'a': 0.8173076923076923,
'f1': 0.6984126984126984,
'p': 0.5922406967537609,
'r': 0.850967007963595},
{'a': 0.8394004282655246,
'f1': 0.7223691168693812,
'p': 0.6044247787610619,
'r': 0.897503285151117},
{'a': 0.7712240868706811,
'f1': 0.6457776079480322,
'p': 0.5248447204968945,
'r': 0.8391261171797418},
{'a': 0.7968895800933126,
'f1': 0.5455810716771051,
'p': 0.4365256124721604,
'r': 0.7272727272727273},
{'a': 0.8141809290953546,
'f1': 0.6452742123687281,
'p': 0.5177902621722846,
'r': 0.8560371517027864},
{'a': 0.8471502590673575,
'f1': 0.8021462105969149,
'p': 0.7170263788968825,
'r': 0.9101978691019786},
{'a': 0.8044752264251465,
'f1': 0.6994266994266994,
'p': 0.5918225918225918,
'r': 0.8548548548548549}],
'www.forbes.com;2010': [{'a': 0.7915611814345992,
'f1': 0.5259117082533589,
'p': 0.40058479532163743,
'r': 0.7653631284916201},
{'a': 0.8416464891041162,
'f1': 0.8436154949784792,
'p': 0.7903225806451613,
'r': 0.9046153846153846},
{'a': 0.8192612137203166,
'f1': 0.7467652495378928,
'p': 0.6464,
'r': 0.8840262582056893},
{'a': 0.8205298013245033,
'f1': 0.7673819742489271,
'p': 0.6641901931649331,
'r': 0.9085365853658537},
{'a': 0.8626182180189149,
'f1': 0.8696883852691218,
'p': 0.8143236074270557,
'r': 0.9331306990881459},
{'a': 0.8248877485567672,
'f1': 0.781074578989575,
'p': 0.6849507735583685,
'r': 0.9085820895522388},
{'a': 0.7814009661835749,
'f1': 0.6852173913043478,
'p': 0.5620542082738944,
'r': 0.8775055679287305},
{'a': 0.8283957518166574,
'f1': 0.8151715833835038,
'p': 0.7148891235480465,
'r': 0.9481792717086834},
{'a': 0.8286052009456265,
'f1': 0.7991689750692521,
'p': 0.7185554171855542,
'r': 0.9001560062402496},
{'a': 0.8015448603683898,
'f1': 0.7216666666666667,
'p': 0.6022253129346314,
'r': 0.9002079002079002},
{'a': 0.8589672404219878,
'f1': 0.8504122497055359,
'p': 0.7780172413793104,
'r': 0.9376623376623376},
{'a': 0.8394276629570747,
'f1': 0.8359501894964808,
'p': 0.7766599597585513,
'r': 0.9050410316529894},
{'a': 0.8221933809214795,
'f1': 0.7720465890183028,
'p': 0.6666666666666666,
'r': 0.9169960474308301},
{'a': 0.8209127671865973,
'f1': 0.7930574098798399,
'p': 0.72,
'r': 0.8826151560178306},
{'a': 0.7995169082125604,
'f1': 0.7511244377811094,
'p': 0.6382165605095541,
'r': 0.912568306010929},
{'a': 0.8140936485859991,
'f1': 0.8107597923548844,
'p': 0.7212426532325776,
'r': 0.9256465517241379},
{'a': 0.8165137614678899,
'f1': 0.7513321492007106,
'p': 0.6619718309859155,
'r': 0.8685831622176592},
{'a': 0.832996632996633,
'f1': 0.7761732851985559,
'p': 0.7037643207855974,
'r': 0.8651911468812877},
{'a': 0.7807270628967109,
'f1': 0.6960000000000001,
'p': 0.5634715025906736,
'r': 0.9100418410041841},
{'a': 0.7876754118364857,
'f1': 0.7314814814814815,
'p': 0.6030534351145038,
'r': 0.9294117647058824},
{'a': 0.8518041237113402,
'f1': 0.8200312989045384,
'p': 0.7380281690140845,
'r': 0.9225352112676056},
{'a': 0.5521945432977462,
'f1': 0.1837837837837838,
'p': 0.10429447852760736,
'r': 0.7727272727272727},
{'a': 0.6136631330977621,
'f1': 0.3568627450980393,
'p': 0.22303921568627452,
'r': 0.8921568627450981},
{'a': 0.7965571205007824,
'f1': 0.7861842105263159,
'p': 0.6815589353612167,
'r': 0.9287564766839378},
{'a': 0.8521351179094965,
'f1': 0.8237082066869301,
'p': 0.7374149659863946,
'r': 0.9328743545611016}],
'www.forbes.com;2015': [{'a': 0.5209003215434084,
'f1': 0.5578635014836795,
'p': 0.397583081570997,
'r': 0.9346590909090909},
{'a': 0.4192546583850932,
'f1': 0.39612486544671693,
'p': 0.252400548696845,
'r': 0.92},
{'a': 0.4173040152963671,
'f1': 0.3827848101265823,
'p': 0.2412252712188896,
'r': 0.9264705882352942},
{'a': 0.4401840490797546,
'f1': 0.43234836702954904,
'p': 0.28118678354686444,
'r': 0.9349775784753364},
{'a': 0.4429841515268651,
'f1': 0.46251398731816484,
'p': 0.30587074494326594,
'r': 0.9480122324159022},
{'a': 0.4754521963824289,
'f1': 0.5120192307692307,
'p': 0.35013698630136986,
'r': 0.9523099850968704},
{'a': 0.5335570469798657,
'f1': 0.5962236746550472,
'p': 0.4364699627857523,
'r': 0.9404352806414662},
{'a': 0.45643294758339004,
'f1': 0.49478013286934514,
'p': 0.33706896551724136,
'r': 0.929845422116528},
{'a': 0.3932432432432432,
'f1': 0.3812586127698667,
'p': 0.24002313475997686,
'r': 0.9263392857142857},
{'a': 0.2619603267211202,
'f1': 0.2291285801340646,
'p': 0.13146853146853146,
'r': 0.8909952606635071},
{'a': 0.38078703703703703,
'f1': 0.27309782608695654,
'p': 0.16170555108608206,
'r': 0.8777292576419214},
{'a': 0.5161775771256584,
'f1': 0.5808344198174706,
'p': 0.4206798866855524,
'r': 0.9378947368421052},
{'a': 0.5187677053824362,
'f1': 0.5883065737655256,
'p': 0.43640449438202245,
'r': 0.9024163568773235},
{'a': 0.5134364820846905,
'f1': 0.5693693693693694,
'p': 0.4101765316718588,
'r': 0.9305064782096584},
{'a': 0.44166666666666665,
'f1': 0.45667947076397786,
'p': 0.30415008527572485,
'r': 0.916095890410959},
{'a': 0.4437314906219151,
'f1': 0.4350877192982456,
'p': 0.2823682498373455,
'r': 0.9475982532751092},
{'a': 0.4184162062615101,
'f1': 0.42266910420475323,
'p': 0.27432368296155674,
'r': 0.9203821656050956},
{'a': 0.3955431754874652,
'f1': 0.3163201008191557,
'p': 0.19145690312738367,
'r': 0.9094202898550725},
{'a': 0.42181340341655715,
'f1': 0.43250214961306965,
'p': 0.28386004514672686,
'r': 0.907942238267148},
{'a': 0.5091269841269841,
'f1': 0.5639760310186818,
'p': 0.4065040650406504,
'r': 0.9205983889528193},
{'a': 0.33960396039603963,
'f1': 0.2377142857142857,
'p': 0.13747521480502314,
'r': 0.8776371308016878},
{'a': 0.47776726584673607,
'f1': 0.495890410958904,
'p': 0.33622291021671824,
'r': 0.9443478260869566},
{'a': 0.25967894239848915,
'f1': 0.08624708624708625,
'p': 0.04559457794208256,
'r': 0.7956989247311828},
{'a': 0.5058731401722788,
'f1': 0.5663230240549828,
'p': 0.4057114721811915,
'r': 0.9374288964732651},
{'a': 0.45976470588235296,
'f1': 0.4796010879419764,
'p': 0.32334963325183375,
'r': 0.9280701754385965}],
'www.foxnews.com;2000': [{'a': 0.9319298245614035,
'f1': 0.8921023359288098,
'p': 0.8336798336798337,
'r': 0.9593301435406698},
{'a': 0.9034051424600417,
'f1': 0.7590987868284229,
'p': 0.6479289940828402,
'r': 0.9163179916317992},
{'a': 0.9359477124183007,
'f1': 0.9118705035971222,
'p': 0.8681506849315068,
'r': 0.9602272727272727},
{'a': 0.8822222222222222,
'f1': 0.866274179983179,
'p': 0.8135860979462876,
'r': 0.9262589928057554},
{'a': 0.9245283018867925,
'f1': 0.8461538461538461,
'p': 0.7601626016260162,
'r': 0.9540816326530612},
{'a': 0.9237708615245828,
'f1': 0.9404719971821065,
'p': 0.9050847457627119,
'r': 0.9787390029325513},
{'a': 0.8167979002624672,
'f1': 0.8167979002624672,
'p': 0.7157313707451702,
'r': 0.9511002444987775},
{'a': 0.9257178526841449,
'f1': 0.9025389025389026,
'p': 0.8425076452599388,
'r': 0.9717813051146384},
{'a': 0.9233261339092873,
'f1': 0.9374449339207048,
'p': 0.9027149321266968,
'r': 0.9749541844838119},
{'a': 0.8448275862068966,
'f1': 0.7559572719802793,
'p': 0.6488011283497884,
'r': 0.905511811023622},
{'a': 0.9342214820982515,
'f1': 0.8336842105263158,
'p': 0.7586206896551724,
'r': 0.9252336448598131},
{'a': 0.8735244519392917,
'f1': 0.8278500382555469,
'p': 0.7421124828532236,
'r': 0.9359861591695502},
{'a': 0.9365079365079365,
'f1': 0.8823529411764706,
'p': 0.8311345646437994,
'r': 0.9402985074626866},
{'a': 0.9422535211267605,
'f1': 0.9098901098901099,
'p': 0.8589211618257261,
'r': 0.9672897196261683},
{'a': 0.2631578947368421,
'f1': 0.125,
'p': 0.07692307692307693,
'r': 0.3333333333333333},
{'a': 0.8852844381758345,
'f1': 0.8790882061446976,
'p': 0.8049001814882033,
'r': 0.9683406113537117},
{'a': 0.8974201474201474,
'f1': 0.8913467794404685,
'p': 0.845679012345679,
'r': 0.9422283356258597},
{'a': 0.906423034330011,
'f1': 0.8926984126984127,
'p': 0.8754669987546699,
'r': 0.9106217616580311},
{'a': 0.9460406447091801,
'f1': 0.9171151776103336,
'p': 0.8676171079429735,
'r': 0.9726027397260274},
{'a': 0.9829737368167092,
'f1': 0.9908365794843257,
'p': 0.9932316846411305,
'r': 0.9884529977794226},
{'a': 0.9434759640781828,
'f1': 0.9413698630136986,
'p': 0.9206859592711683,
'r': 0.9630044843049327},
{'a': 0.9032527105921602,
'f1': 0.9236842105263158,
'p': 0.8886075949367088,
'r': 0.9616438356164384},
{'a': 0.9553376906318083,
'f1': 0.9670152855993565,
'p': 0.9570063694267515,
'r': 0.9772357723577236},
{'a': 0.9263233190271817,
'f1': 0.8772348033373063,
'p': 0.8070175438596491,
'r': 0.9608355091383812},
{'a': 0.9519125683060109,
'f1': 0.9377652050919378,
'p': 0.9298737727910238,
'r': 0.9457917261055635}],
'www.foxnews.com;2005': [{'a': 0.5862177470106985,
'f1': 0.6531258243207597,
'p': 0.49068569163694015,
'r': 0.9763406940063092},
{'a': 0.4221576227390181,
'f1': 0.44111215245235863,
'p': 0.28687525396180413,
'r': 0.9540540540540541},
{'a': 0.4481697939095663,
'f1': 0.4868421052631578,
'p': 0.32642884541618716,
'r': 0.9572553430821147},
{'a': 0.45678233438485805,
'f1': 0.49531066822977726,
'p': 0.33625149224035017,
'r': 0.9399332591768632},
{'a': 0.3966655088572421,
'f1': 0.3776424220709423,
'p': 0.23632286995515694,
'r': 0.9393939393939394},
{'a': 0.3468552288797916,
'f1': 0.25159914712153514,
'p': 0.14603960396039603,
'r': 0.9076923076923077},
{'a': 0.541019955654102,
'f1': 0.6026871401151631,
'p': 0.43715194908512334,
'r': 0.969991173874669},
{'a': 0.5936728741305811,
'f1': 0.3694289693593315,
'p': 0.2370949720670391,
'r': 0.8360914105594957},
{'a': 0.710692588092345,
'f1': 0.8063440422936152,
'p': 0.7160190668785209,
'r': 0.922747580044676},
{'a': 0.502289538569919,
'f1': 0.28672387682988393,
'p': 0.16975493126120741,
'r': 0.922077922077922},
{'a': 0.5193734542456719,
'f1': 0.5294592413236482,
'p': 0.3664804469273743,
'r': 0.9534883720930233},
{'a': 0.4351763584366063,
'f1': 0.33613445378151263,
'p': 0.2051983584131327,
'r': 0.9287925696594427},
{'a': 0.4521299029945171,
'f1': 0.4119511090991399,
'p': 0.26361529548088064,
'r': 0.9420289855072463},
{'a': 0.37771161207996595,
'f1': 0.2796651895617922,
'p': 0.16502033701336433,
'r': 0.9161290322580645},
{'a': 0.45897516504243946,
'f1': 0.4983969688137569,
'p': 0.3378111418411695,
'r': 0.95},
{'a': 0.4394703656998739,
'f1': 0.4708333333333333,
'p': 0.31314330958036424,
'r': 0.9484412470023981},
{'a': 0.4479101684341859,
'f1': 0.4863609982588508,
'p': 0.324680356450988,
'r': 0.96878612716763},
{'a': 0.6753774680603949,
'f1': 0.7351129363449693,
'p': 0.5918107833163785,
'r': 0.9699874947894956},
{'a': 0.3812305295950156,
'f1': 0.31538130116329166,
'p': 0.1893429901707191,
'r': 0.9432989690721649},
{'a': 0.38862698880377133,
'f1': 0.40731219651528133,
'p': 0.2590843023255814,
'r': 0.951935914552737},
{'a': 0.6396255850234009,
'f1': 0.6721544138518308,
'p': 0.531895777178796,
'r': 0.912875867386276},
{'a': 0.438740157480315,
'f1': 0.4721563981042654,
'p': 0.31415057154119036,
'r': 0.9499404052443385},
{'a': 0.3756345177664975,
'f1': 0.3840942562592047,
'p': 0.24283054003724394,
'r': 0.9183098591549296},
{'a': 0.49910340705319783,
'f1': 0.4939613526570048,
'p': 0.33524590163934426,
'r': 0.9380733944954128},
{'a': 0.44757681343047195,
'f1': 0.4837181764357608,
'p': 0.3224151539068666,
'r': 0.9680094786729858}],
'www.foxnews.com;2010': [{'a': 0.6641459074733096,
'f1': 0.3028624192059095,
'p': 0.18636363636363637,
'r': 0.8078817733990148},
{'a': 0.613623516110797,
'f1': 0.5718759787034137,
'p': 0.4163246694026448,
'r': 0.913},
{'a': 0.25618698441796517,
'f1': 0.17362525458248473,
'p': 0.09600225225225226,
'r': 0.9069148936170213},
{'a': 0.665318957771788,
'f1': 0.26310583580613256,
'p': 0.1581450653983353,
'r': 0.7823529411764706},
{'a': 0.6616678858814923,
'f1': 0.49425915800984144,
'p': 0.3450381679389313,
'r': 0.8709055876685935},
{'a': 0.6058584686774942,
'f1': 0.5516331243813922,
'p': 0.3954588457899716,
'r': 0.9116684841875682},
{'a': 0.5616740088105727,
'f1': 0.427455815865187,
'p': 0.2813852813852814,
'r': 0.8888888888888888},
{'a': 0.5795739348370927,
'f1': 0.4708201892744479,
'p': 0.32183288409703503,
'r': 0.8766519823788547},
{'a': 0.6489323199420919,
'f1': 0.4640883977900552,
'p': 0.3157894736842105,
'r': 0.875},
{'a': 0.5876229034123771,
'f1': 0.5166101694915254,
'p': 0.36182336182336183,
'r': 0.9028436018957346},
{'a': 0.5419508867667121,
'f1': 0.3301745635910225,
'p': 0.20482673267326731,
'r': 0.8508997429305912},
{'a': 0.5361552028218695,
'f1': 0.2864894194248508,
'p': 0.17334208798424164,
'r': 0.825},
{'a': 0.6565064478311841,
'f1': 0.41046277665995984,
'p': 0.2749326145552561,
'r': 0.8095238095238095},
{'a': 0.7466003316749585,
'f1': 0.7054741711642253,
'p': 0.5672659640421575,
'r': 0.9327217125382263},
{'a': 0.7,
'f1': 0.6019417475728156,
'p': 0.44742268041237115,
'r': 0.9194915254237288},
{'a': 0.6822820037105751,
'f1': 0.29887410440122825,
'p': 0.18204488778054864,
'r': 0.8342857142857143},
{'a': 0.5770521818736649,
'f1': 0.471395881006865,
'p': 0.32053941908713696,
'r': 0.8904899135446686},
{'a': 0.7655028660760813,
'f1': 0.7791952894995093,
'p': 0.6597424179476526,
'r': 0.951467944877172},
{'a': 0.6633318834275772,
'f1': 0.3101604278074866,
'p': 0.19376391982182628,
'r': 0.7767857142857143},
{'a': 0.7121501272264631,
'f1': 0.6596464836404664,
'p': 0.5122663551401869,
'r': 0.9260823653643083},
{'a': 0.6885584469227591,
'f1': 0.4512372634643377,
'p': 0.3045186640471513,
'r': 0.8707865168539326},
{'a': 0.5496666666666666,
'f1': 0.3683964469378214,
'p': 0.23368920521945433,
'r': 0.869757174392936},
{'a': 0.7324405575293796,
'f1': 0.7325867249385413,
'p': 0.6029676258992805,
'r': 0.9331941544885177},
{'a': 0.5312071330589849,
'f1': 0.306443429731101,
'p': 0.18676561533704392,
'r': 0.8531073446327684},
{'a': 0.7248960190136661,
'f1': 0.6979778212654926,
'p': 0.5643459915611815,
'r': 0.9145299145299145}],
'www.foxnews.com;2015': [{'a': 0.7937106918238994,
'f1': 0.8481481481481481,
'p': 0.7614297589359933,
'r': 0.9571577847439916},
{'a': 0.40814299900695133,
'f1': 0.48173913043478267,
'p': 0.32284382284382285,
'r': 0.9486301369863014},
{'a': 0.6931067044381491,
'f1': 0.48330683624801274,
'p': 0.33480176211453744,
'r': 0.8685714285714285},
{'a': 0.6524390243902439,
'f1': 0.564885496183206,
'p': 0.41965973534971646,
'r': 0.8638132295719845},
{'a': 0.7480438184663537,
'f1': 0.701851851851852,
'p': 0.5565345080763583,
'r': 0.949874686716792},
{'a': 0.6549210206561361,
'f1': 0.5186440677966102,
'p': 0.36428571428571427,
'r': 0.9},
{'a': 0.6789587852494577,
'f1': 0.6145833333333334,
'p': 0.4609375,
'r': 0.921875},
{'a': 0.71280276816609,
'f1': 0.5970873786407768,
'p': 0.45137614678899085,
'r': 0.8817204301075269},
{'a': 0.7400130975769482,
'f1': 0.7871313672922252,
'p': 0.6697080291970803,
'r': 0.9544863459037711},
{'a': 0.696058091286307,
'f1': 0.3933747412008281,
'p': 0.25815217391304346,
'r': 0.8260869565217391},
{'a': 0.676056338028169,
'f1': 0.6672694394213382,
'p': 0.5234042553191489,
'r': 0.9201995012468828},
{'a': 0.6980440097799511,
'f1': 0.6466380543633763,
'p': 0.49130434782608695,
'r': 0.9456066945606695},
{'a': 0.7577120822622108,
'f1': 0.7606349206349207,
'p': 0.6338624338624339,
'r': 0.9507936507936507},
{'a': 0.7275042444821732,
'f1': 0.7429943955164131,
'p': 0.6178428761651131,
'r': 0.9317269076305221},
{'a': 0.8031189083820662,
'f1': 0.8543969245555022,
'p': 0.7657192075796727,
'r': 0.966304347826087},
{'a': 0.7891752577319587,
'f1': 0.8456021140052851,
'p': 0.7619047619047619,
'r': 0.9499575911789653},
{'a': 0.633112582781457,
'f1': 0.4954462659380692,
'p': 0.35509138381201044,
'r': 0.8192771084337349},
{'a': 0.8115593016255268,
'f1': 0.8646779074794638,
'p': 0.7836990595611285,
'r': 0.9643201542912246},
{'a': 0.6971726190476191,
'f1': 0.7233174711080897,
'p': 0.5891472868217055,
'r': 0.9366197183098591},
{'a': 0.7666422823701536,
'f1': 0.8155002891844997,
'p': 0.7042957042957043,
'r': 0.9684065934065934},
{'a': 0.5332918481642813,
'f1': 0.6543778801843317,
'p': 0.49511854951185497,
'r': 0.9646739130434783},
{'a': 0.7387152777777778,
'f1': 0.7554833468724614,
'p': 0.6275303643724697,
'r': 0.9489795918367347},
{'a': 0.6707089552238806,
'f1': 0.6452261306532663,
'p': 0.49767441860465117,
'r': 0.9171428571428571},
{'a': 0.8142777467930842,
'f1': 0.8415040456925273,
'p': 0.7485182049110923,
'r': 0.9608695652173913},
{'a': 0.6452599388379205,
'f1': 0.58472553699284,
'p': 0.4268292682926829,
'r': 0.928030303030303},
{'a': 0.7916921197312157,
'f1': 0.8081035453010692,
'p': 0.6991236611489776,
'r': 0.9573333333333334}],
'www.latimes.com;2000': [{'a': 0.8618266978922716,
'f1': 0.891444342226311,
'p': 0.8331900257953568,
'r': 0.9584569732937686},
{'a': 0.8854328667703474,
'f1': 0.9160015203344737,
'p': 0.8694083694083694,
'r': 0.9678714859437751},
{'a': 0.7727748691099476,
'f1': 0.6813509544787079,
'p': 0.5407925407925408,
'r': 0.9206349206349206},
{'a': 0.8372573316811235,
'f1': 0.8828078524687687,
'p': 0.8069603045133225,
'r': 0.9743926460932371},
{'a': 0.8919825561891983,
'f1': 0.9302123970524491,
'p': 0.8849484536082475,
'r': 0.9803563270899954},
{'a': 0.8368794326241135,
'f1': 0.8732513777024163,
'p': 0.7935285053929122,
'r': 0.9707822808671065},
{'a': 0.8281622911694511,
'f1': 0.8665018541409146,
'p': 0.790304396843292,
'r': 0.9589603283173734},
{'a': 0.8438381937911571,
'f1': 0.8424050632911391,
'p': 0.7541076487252124,
'r': 0.9541218637992831},
{'a': 0.8832468495181616,
'f1': 0.8970924534465862,
'p': 0.841814837522992,
'r': 0.9601398601398602},
{'a': 0.8305252725470763,
'f1': 0.7942238267148015,
'p': 0.6947368421052632,
'r': 0.9269662921348315},
{'a': 0.943357783211084,
'f1': 0.9237520570488207,
'p': 0.9202185792349726,
'r': 0.9273127753303965},
{'a': 0.8780185758513932,
'f1': 0.9221343873517788,
'p': 0.8679315476190477,
'r': 0.9835581787521079},
{'a': 0.8142548596112311,
'f1': 0.8371212121212122,
'p': 0.7620689655172413,
'r': 0.9285714285714286},
{'a': 0.8528121260470682,
'f1': 0.898598516075845,
'p': 0.8414822439526506,
'r': 0.9640330188679245},
{'a': 0.8583333333333333,
'f1': 0.8960805465659835,
'p': 0.836241610738255,
'r': 0.965143299767622},
{'a': 0.876278118609407,
'f1': 0.9090225563909775,
'p': 0.8550212164073551,
'r': 0.9703049759229535},
{'a': 0.8596787827557059,
'f1': 0.9108804581245526,
'p': 0.855749831876261,
'r': 0.9736036725325172},
{'a': 0.8466479325572059,
'f1': 0.8820987654320989,
'p': 0.8114707552526973,
'r': 0.9661933739012847},
{'a': 0.8487243981315128,
'f1': 0.8990165507315904,
'p': 0.8381037567084079,
'r': 0.9694774961200207},
{'a': 0.8532110091743119,
'f1': 0.8987898789878989,
'p': 0.8409675759135358,
'r': 0.965150620200827},
{'a': 0.8590122414520895,
'f1': 0.901591043017089,
'p': 0.8443708609271523,
'r': 0.9671302149178256},
{'a': 0.8336614173228346,
'f1': 0.874536005939124,
'p': 0.7959459459459459,
'r': 0.9703459637561779},
{'a': 0.8046272493573264,
'f1': 0.7915904936014624,
'p': 0.685126582278481,
'r': 0.9372294372294372},
{'a': 0.859375,
'f1': 0.8992094861660078,
'p': 0.8353733170134638,
'r': 0.9736091298145506},
{'a': 0.8827197149643705,
'f1': 0.9253166950274153,
'p': 0.8776901004304161,
'r': 0.9784086365453818}],
'www.latimes.com;2005': [{'a': 0.7525014714537963,
'f1': 0.7354513998112615,
'p': 0.6234666666666666,
'r': 0.8964723926380368},
{'a': 0.5781155015197569,
'f1': 0.48970588235294116,
'p': 0.33467336683417087,
'r': 0.9123287671232877},
{'a': 0.7318357318357318,
'f1': 0.7159851301115241,
'p': 0.5804701627486437,
'r': 0.9340446168768186},
{'a': 0.7339864355689525,
'f1': 0.7038590604026845,
'p': 0.557845744680851,
'r': 0.9534090909090909},
{'a': 0.7616312464101092,
'f1': 0.7050461975835111,
'p': 0.5727482678983834,
'r': 0.9168207024029574},
{'a': 0.8628613787991104,
'f1': 0.8975069252077562,
'p': 0.8871851040525739,
'r': 0.9080717488789237},
{'a': 0.8317972350230415,
'f1': 0.7351874244256348,
'p': 0.6268041237113402,
'r': 0.8888888888888888},
{'a': 0.6709758131776481,
'f1': 0.7032719067318541,
'p': 0.5598802395209581,
'r': 0.9453993933265925},
{'a': 0.7899941826643397,
'f1': 0.8211100099108027,
'p': 0.7157667386609071,
'r': 0.9628123184195235},
{'a': 0.7357654278895329,
'f1': 0.7982296277011196,
'p': 0.6865203761755486,
'r': 0.9533582089552238},
{'a': 0.705989110707804,
'f1': 0.7267206477732794,
'p': 0.5924092409240924,
'r': 0.9397905759162304},
{'a': 0.8264561046018,
'f1': 0.8742927429274293,
'p': 0.8486150907354346,
'r': 0.9015728056823947},
{'a': 0.7193747728098873,
'f1': 0.6884584342211462,
'p': 0.541243654822335,
'r': 0.9456762749445676},
{'a': 0.705330204492122,
'f1': 0.7031408308004053,
'p': 0.5636166756903086,
'r': 0.9344703770197487},
{'a': 0.7557134033353922,
'f1': 0.7478482626713421,
'p': 0.6455696202531646,
'r': 0.8886363636363637},
{'a': 0.7514698022447889,
'f1': 0.7134935304990758,
'p': 0.5830815709969789,
'r': 0.919047619047619},
{'a': 0.7793185419968305,
'f1': 0.8156239655743132,
'p': 0.7080459770114943,
'r': 0.9617486338797814},
{'a': 0.7261724659606656,
'f1': 0.7008264462809918,
'p': 0.5653333333333334,
'r': 0.9217391304347826},
{'a': 0.65639374425023,
'f1': 0.6784330606973741,
'p': 0.5260347129506008,
'r': 0.9551515151515152},
{'a': 0.5795194508009154,
'f1': 0.36253252385082396,
'p': 0.23196448390677027,
'r': 0.8293650793650794},
{'a': 0.6690442225392297,
'f1': 0.6912156166814553,
'p': 0.5402219140083218,
'r': 0.9593596059113301},
{'a': 0.7964285714285714,
'f1': 0.8511749347258485,
'p': 0.7608617594254937,
'r': 0.9658158614402917},
{'a': 0.9039400846629763,
'f1': 0.9232370543845955,
'p': 0.8910095429432446,
'r': 0.9578833693304536},
{'a': 0.7070113591852722,
'f1': 0.7375438596491228,
'p': 0.601258581235698,
'r': 0.9537205081669692},
{'a': 0.5860437610881135,
'f1': 0.3506493506493506,
'p': 0.220536756126021,
'r': 0.8552036199095022}],
'www.latimes.com;2010': [{'a': 0.57758360952094,
'f1': 0.501776830135039,
'p': 0.35124378109452736,
'r': 0.8781094527363185},
{'a': 0.5115002613695766,
'f1': 0.4247460757156048,
'p': 0.2799188640973631,
'r': 0.8801020408163265},
{'a': 0.5449659620207811,
'f1': 0.3266171792152704,
'p': 0.20602006688963212,
'r': 0.7877237851662404},
{'a': 0.5340042695943885,
'f1': 0.3950910530482977,
'p': 0.2577479338842975,
'r': 0.8457627118644068},
{'a': 0.8609392064069894,
'f1': 0.8715534633490248,
'p': 0.8307692307692308,
'r': 0.9165487977369166},
{'a': 0.857029702970297,
'f1': 0.8579299488390398,
'p': 0.8056171470805618,
'r': 0.9175084175084175},
{'a': 0.548844976342889,
'f1': 0.4693944353518822,
'p': 0.31937639198218265,
'r': 0.8851851851851852},
{'a': 0.6771910724006532,
'f1': 0.4549632352941177,
'p': 0.31873792659368966,
'r': 0.7945425361155698},
{'a': 0.8418150975402884,
'f1': 0.8306854289605085,
'p': 0.7760814249363868,
'r': 0.8935546875},
{'a': 0.8308383233532934,
'f1': 0.7794404684450228,
'p': 0.6957026713124274,
'r': 0.886094674556213},
{'a': 0.6095802214782384,
'f1': 0.1502242152466368,
'p': 0.08411801632140616,
'r': 0.7015706806282722},
{'a': 0.8383490971625107,
'f1': 0.8251162790697675,
'p': 0.7659758203799655,
'r': 0.8941532258064516},
{'a': 0.5950958817981766,
'f1': 0.5110098709187547,
'p': 0.36339092872570194,
'r': 0.860613810741688},
{'a': 0.5516628585935517,
'f1': 0.5002829654782116,
'p': 0.34913112164297,
'r': 0.8822355289421158},
{'a': 0.6509019990248659,
'f1': 0.4077750206782465,
'p': 0.28107183580387685,
'r': 0.7424698795180723},
{'a': 0.6175968109339408,
'f1': 0.29352972119936876,
'p': 0.17873158231902628,
'r': 0.8205882352941176},
{'a': 0.7477128782547502,
'f1': 0.5571340333539223,
'p': 0.4092558983666062,
'r': 0.8723404255319149},
{'a': 0.5845451953236385,
'f1': 0.5233889434085706,
'p': 0.37174721189591076,
'r': 0.8839779005524862},
{'a': 0.5083354706334958,
'f1': 0.3749592435604826,
'p': 0.23958333333333334,
'r': 0.8620689655172413},
{'a': 0.5472246439988375,
'f1': 0.4517945109078114,
'p': 0.30556877677296523,
'r': 0.8663967611336032},
{'a': 0.8484729835552075,
'f1': 0.8508670520231213,
'p': 0.8029090909090909,
'r': 0.9049180327868852},
{'a': 0.5529940119760479,
'f1': 0.4553082816490333,
'p': 0.30754066042385414,
'r': 0.8764044943820225},
{'a': 0.5807717462393721,
'f1': 0.47286184210526316,
'p': 0.3281963470319635,
'r': 0.8455882352941176},
{'a': 0.8423690205011389,
'f1': 0.8180862250262881,
'p': 0.7444976076555024,
'r': 0.9078179696616102},
{'a': 0.6485103821847727,
'f1': 0.22443559096945553,
'p': 0.13296616837136113,
'r': 0.7191489361702128}],
'www.latimes.com;2015': [{'a': 0.5784021569462182,
'f1': 0.16661991584852734,
'p': 0.09177997527812114,
'r': 0.9027355623100304},
{'a': 0.5897770055024616,
'f1': 0.16307237813884784,
'p': 0.09046214355948869,
'r': 0.8263473053892215},
{'a': 0.5769712140175219,
'f1': 0.1892324093816631,
'p': 0.10543510543510544,
'r': 0.922077922077922},
{'a': 0.27680861980502824,
'f1': 0.3418164837730563,
'p': 0.20742419948994048,
'r': 0.9708222811671088},
{'a': 0.21026446713799424,
'f1': 0.21212121212121213,
'p': 0.11944689614592527,
'r': 0.9463869463869464},
{'a': 0.581120513568719,
'f1': 0.13184154823102512,
'p': 0.07103290974258716,
'r': 0.9159663865546218},
{'a': 0.30413974635080165,
'f1': 0.3818027210884354,
'p': 0.23927524646949108,
'r': 0.9442691903259727},
{'a': 0.5865577529378045,
'f1': 0.18063050269809713,
'p': 0.1000629326620516,
'r': 0.9271137026239067},
{'a': 0.5787281310747014,
'f1': 0.19394261424017006,
'p': 0.10846953937592868,
'r': 0.9147869674185464},
{'a': 0.5894652191395255,
'f1': 0.2861803775343743,
'p': 0.16840373011519474,
'r': 0.951937984496124},
{'a': 0.5644796380090498,
'f1': 0.11646586345381524,
'p': 0.06251924853711117,
'r': 0.8493723849372385},
{'a': 0.57023030992323,
'f1': 0.13107214716872664,
'p': 0.07058823529411765,
'r': 0.9156626506024096},
{'a': 0.19839381320642474,
'f1': 0.18358073311117842,
'p': 0.10198586334567486,
'r': 0.9181818181818182},
{'a': 0.5786845484377229,
'f1': 0.16887137630171684,
'p': 0.09276437847866419,
'r': 0.9404388714733543},
{'a': 0.5842517401392111,
'f1': 0.15352819604369647,
'p': 0.08352071956312239,
'r': 0.948905109489051},
{'a': 0.5832123148417078,
'f1': 0.14328358208955222,
'p': 0.07794738551477752,
'r': 0.8856088560885609},
{'a': 0.5886075949367089,
'f1': 0.17768832662449685,
'p': 0.09831371301304487,
'r': 0.9223880597014925},
{'a': 0.5819786995515696,
'f1': 0.20219309975929392,
'p': 0.11365003006614552,
'r': 0.9152542372881356},
{'a': 0.5828043058239029,
'f1': 0.22267935201851374,
'p': 0.12657117801812334,
'r': 0.9252136752136753},
{'a': 0.20013275804845668,
'f1': 0.18526031102096008,
'p': 0.1025065469509914,
'r': 0.9614035087719298},
{'a': 0.5935805991440799,
'f1': 0.2179522371671699,
'p': 0.12291021671826625,
'r': 0.9612590799031477},
{'a': 0.5939498933901919,
'f1': 0.3076573506021359,
'p': 0.18327016783974012,
'r': 0.9575671852899575},
{'a': 0.2395295467584624,
'f1': 0.26176552492341965,
'p': 0.15161290322580645,
'r': 0.9572301425661914},
{'a': 0.19246190858059342,
'f1': 0.18329278183292783,
'p': 0.10167966406718656,
'r': 0.9287671232876712},
{'a': 0.20551449687322343,
'f1': 0.20120034295513003,
'p': 0.1126039667306462,
'r': 0.9436997319034852}],
'www.nymag.com;2000': [{'a': 0.9341021416803954,
'f1': 0.9358288770053477,
'p': 0.9133611691022965,
'r': 0.9594298245614035},
{'a': 0.9366591928251121,
'f1': 0.9363380281690141,
'p': 0.9264214046822743,
'r': 0.9464692482915718},
{'a': 0.9366489046773239,
'f1': 0.9431153641679958,
'p': 0.929769392033543,
'r': 0.9568500539374326},
{'a': 0.9255404323458767,
'f1': 0.9085545722713865,
'p': 0.8700564971751412,
'r': 0.9506172839506173},
{'a': 0.921443736730361,
'f1': 0.8168316831683169,
'p': 0.7568807339449541,
'r': 0.8870967741935484},
{'a': 0.9404466501240695,
'f1': 0.9449541284403669,
'p': 0.9395667046750285,
'r': 0.9504036908881199},
{'a': 0.9535728704077513,
'f1': 0.9664820751967356,
'p': 0.9673278879813302,
'r': 0.9656377402446127},
{'a': 0.9366706875753921,
'f1': 0.9412423055400112,
'p': 0.9375696767001115,
'r': 0.9449438202247191},
{'a': 0.9346674514420247,
'f1': 0.9405463310123192,
'p': 0.9370330843116329,
'r': 0.9440860215053763},
{'a': 0.5083661417322834,
'f1': 0.42287694974003465,
'p': 0.27938931297709924,
'r': 0.8693586698337292},
{'a': 0.9113418530351438,
'f1': 0.8893320039880358,
'p': 0.8576923076923076,
'r': 0.9233954451345756},
{'a': 0.62508038585209,
'f1': 0.5004284490145672,
'p': 0.34679334916864607,
'r': 0.8984615384615384},
{'a': 0.9434806939003917,
'f1': 0.9508515815085158,
'p': 0.9467054263565892,
'r': 0.9550342130987293},
{'a': 0.9415650406504065,
'f1': 0.9530036779730282,
'p': 0.9588815789473685,
'r': 0.9471974004874086},
{'a': 0.9335915882678473,
'f1': 0.9424184261036469,
'p': 0.9281663516068053,
'r': 0.9571150097465887},
{'a': 0.9487983281086729,
'f1': 0.9578675838349098,
'p': 0.93929173693086,
'r': 0.9771929824561404},
{'a': 0.6235541535226078,
'f1': 0.5846867749419953,
'p': 0.42495784148397975,
'r': 0.9368029739776952},
{'a': 0.9231962761830876,
'f1': 0.875784190715182,
'p': 0.8533007334963325,
'r': 0.8994845360824743},
{'a': 0.9348189415041783,
'f1': 0.9348189415041782,
'p': 0.9311875693673696,
'r': 0.9384787472035794},
{'a': 0.9131064446053584,
'f1': 0.8854961832061069,
'p': 0.8672897196261682,
'r': 0.9044834307992202},
{'a': 0.9284267386900743,
'f1': 0.9266943291839558,
'p': 0.901749663526245,
'r': 0.9530583214793741},
{'a': 0.922209026128266,
'f1': 0.9187848729076256,
'p': 0.8874251497005988,
'r': 0.9524421593830334},
{'a': 0.9273008507347255,
'f1': 0.9142335766423357,
'p': 0.8851590106007067,
'r': 0.9452830188679245},
{'a': 0.9116497263487099,
'f1': 0.8567807351077313,
'p': 0.8203883495145631,
'r': 0.896551724137931},
{'a': 0.9259259259259259,
'f1': 0.9219219219219219,
'p': 0.9205397301349325,
'r': 0.9233082706766917}],
'www.nymag.com;2005': [{'a': 0.6900668576886342,
'f1': 0.7067329417080886,
'p': 0.560573476702509,
'r': 0.9559902200488998},
{'a': 0.6932038834951456,
'f1': 0.7116788321167883,
'p': 0.5693430656934306,
'r': 0.948905109489051},
{'a': 0.6804676083779835,
'f1': 0.6920187793427229,
'p': 0.5411160058737151,
'r': 0.9596354166666666},
{'a': 0.7006767308693389,
'f1': 0.7091552857865452,
'p': 0.5630522088353414,
'r': 0.9576502732240437},
{'a': 0.6964656964656964,
'f1': 0.7041540020263425,
'p': 0.5591311343523733,
'r': 0.9507523939808481},
{'a': 0.6303317535545023,
'f1': 0.4989293361884368,
'p': 0.34776119402985073,
'r': 0.8825757575757576},
{'a': 0.6414073071718539,
'f1': 0.6015037593984962,
'p': 0.449438202247191,
'r': 0.9090909090909091},
{'a': 0.6723192019950125,
'f1': 0.7020408163265306,
'p': 0.5564342199856218,
'r': 0.9508599508599509},
{'a': 0.7182770663562281,
'f1': 0.7317073170731707,
'p': 0.6049495875343721,
'r': 0.9256661991584852},
{'a': 0.7611862643080125,
'f1': 0.7960906263882719,
'p': 0.6876438986953185,
'r': 0.9451476793248945},
{'a': 0.7246865959498554,
'f1': 0.7692929292929294,
'p': 0.6463000678886626,
'r': 0.9500998003992016},
{'a': 0.751131221719457,
'f1': 0.8060275729400448,
'p': 0.696398891966759,
'r': 0.95662100456621},
{'a': 0.642982971227246,
'f1': 0.581267217630854,
'p': 0.4323770491803279,
'r': 0.8865546218487395},
{'a': 0.6345776031434185,
'f1': 0.6574585635359116,
'p': 0.5107296137339056,
'r': 0.9224806201550387},
{'a': 0.666828793774319,
'f1': 0.7004809794490601,
'p': 0.553941908713693,
'r': 0.9524375743162902},
{'a': 0.6948228882833788,
'f1': 0.7427258805513017,
'p': 0.6077694235588973,
'r': 0.9547244094488189},
{'a': 0.7691962281095645,
'f1': 0.8172119487908962,
'p': 0.7114551083591332,
'r': 0.9598997493734336},
{'a': 0.6871046228710462,
'f1': 0.7043678160919541,
'p': 0.5570909090909091,
'r': 0.9575},
{'a': 0.692236169223617,
'f1': 0.7375099127676448,
'p': 0.5953905249679897,
'r': 0.96875},
{'a': 0.6849725411882177,
'f1': 0.698518872431916,
'p': 0.5471556886227545,
'r': 0.9656538969616909},
{'a': 0.723110151187905,
'f1': 0.7632065016623569,
'p': 0.6306471306471306,
'r': 0.9663236669784846},
{'a': 0.7125867195242814,
'f1': 0.7337006427915519,
'p': 0.5962686567164179,
'r': 0.9534606205250596},
{'a': 0.6373333333333333,
'f1': 0.6483971044467425,
'p': 0.4940898345153664,
'r': 0.9428571428571428},
{'a': 0.6335403726708074,
'f1': 0.5154004106776181,
'p': 0.3621933621933622,
'r': 0.8932384341637011},
{'a': 0.6381405176967776,
'f1': 0.6513994910941476,
'p': 0.4972804972804973,
'r': 0.943952802359882},
{'a': 0.7373225152129818,
'f1': 0.7643312101910829,
'p': 0.6593406593406593,
'r': 0.9090909090909091}],
'www.nymag.com;2010': [{'a': 0.42023103340618173,
'f1': 0.18588338448049102,
'p': 0.10495049504950495,
'r': 0.8122605363984674},
{'a': 0.41449363250454824,
'f1': 0.3159759121501949,
'p': 0.19174548581255374,
'r': 0.89738430583501},
{'a': 0.32908419993290844,
'f1': 0.06716417910447761,
'p': 0.03524229074889868,
'r': 0.7128712871287128},
{'a': 0.3365706630944407,
'f1': 0.08751727314601565,
'p': 0.04654581087702107,
'r': 0.7307692307692307},
{'a': 0.3791226384886327,
'f1': 0.21402513173895418,
'p': 0.12330686595049042,
'r': 0.8098159509202454},
{'a': 0.3555699481865285,
'f1': 0.15247018739352639,
'p': 0.08415608838740009,
'r': 0.8099547511312217},
{'a': 0.3525329632199861,
'f1': 0.09505334626576141,
'p': 0.050724637681159424,
'r': 0.7538461538461538},
{'a': 0.3980130394287488,
'f1': 0.26301786393006465,
'p': 0.1557155715571557,
'r': 0.8459657701711492},
{'a': 0.34729772945314996,
'f1': 0.14709569577935644,
'p': 0.08073394495412844,
'r': 0.8262910798122066},
{'a': 0.3388906700865662,
'f1': 0.13723849372384936,
'p': 0.07495429616087751,
'r': 0.8118811881188119},
{'a': 0.5353352901332129,
'f1': 0.5436807095343681,
'p': 0.3865069356872636,
'r': 0.9162929745889388},
{'a': 0.34894020879468524,
'f1': 0.15862632869991825,
'p': 0.08798185941043084,
'r': 0.8049792531120332},
{'a': 0.3361658735554045,
'f1': 0.08866075594960336,
'p': 0.04695996045477014,
'r': 0.7916666666666666},
{'a': 0.31815137307434693,
'f1': 0.05478180129990715,
'p': 0.028474903474903474,
'r': 0.7195121951219512},
{'a': 0.36199095022624433,
'f1': 0.16426756985605417,
'p': 0.09150943396226414,
'r': 0.8016528925619835},
{'a': 0.3780205230056273,
'f1': 0.16153502900490851,
'p': 0.09022931206380858,
'r': 0.7702127659574468},
{'a': 0.37316421895861146,
'f1': 0.1726872246696035,
'p': 0.09645669291338582,
'r': 0.8235294117647058},
{'a': 0.3839458413926499,
'f1': 0.2164821648216482,
'p': 0.12417685794920037,
'r': 0.8434504792332268},
{'a': 0.5273233553776541,
'f1': 0.4674509803921568,
'p': 0.3247956403269755,
'r': 0.8335664335664336},
{'a': 0.386691776522285,
'f1': 0.10694698354661791,
'p': 0.057863501483679525,
'r': 0.7048192771084337},
{'a': 0.4210848912044131,
'f1': 0.31133795114837765,
'p': 0.1903700401248328,
'r': 0.854},
{'a': 0.35743736124326037,
'f1': 0.17035217035217035,
'p': 0.09502055733211512,
'r': 0.8221343873517787},
{'a': 0.37483176312247646,
'f1': 0.16905187835420393,
'p': 0.09388971684053651,
'r': 0.8475336322869955},
{'a': 0.3340020060180542,
'f1': 0.09124087591240876,
'p': 0.04880429477794046,
'r': 0.6993006993006993},
{'a': 0.551829268292683,
'f1': 0.600181323662738,
'p': 0.4431057563587684,
'r': 0.9297752808988764}],
'www.nymag.com;2015': [{'a': 0.13001313485113836,
'f1': 0.0046086412022542265,
'p': 0.002311499711062536,
'r': 0.7419354838709677},
{'a': 0.13121737041001333,
'f1': 0.007684630738522954,
'p': 0.0038609070624514253,
'r': 0.7979274611398963},
{'a': 0.138274384172475,
'f1': 0.024790554112978296,
'p': 0.012588630426669984,
'r': 0.8070175438596491},
{'a': 0.17959805281335836,
'f1': 0.058428789912294395,
'p': 0.030262860098936275,
'r': 0.8432432432432433},
{'a': 0.26507114485393934,
'f1': 0.256321546680234,
'p': 0.14907530699807664,
'r': 0.9135086128739801},
{'a': 0.18815582196352762,
'f1': 0.08481820370816279,
'p': 0.04456604251012146,
'r': 0.8762437810945274},
{'a': 0.13079013103115825,
'f1': 0.004966513657912559,
'p': 0.0024915689334071576,
'r': 0.7443609022556391},
{'a': 0.1319284190360082,
'f1': 0.010668916236601825,
'p': 0.005370971771171621,
'r': 0.7846715328467153},
{'a': 0.1318195738723367,
'f1': 0.0075518879719929976,
'p': 0.0037965453951172906,
'r': 0.695852534562212},
{'a': 0.13133126121787855,
'f1': 0.006409293475539532,
'p': 0.0032178591181054855,
'r': 0.7804878048780488},
{'a': 0.13577454988257806,
'f1': 0.017939214232765014,
'p': 0.009072051583235449,
'r': 0.7943107221006565},
{'a': 0.16074426412982654,
'f1': 0.00872467695561651,
'p': 0.004388443764752817,
'r': 0.7333333333333333},
{'a': 0.13115612993363604,
'f1': 0.008519754870210753,
'p': 0.004282923408305365,
'r': 0.7916666666666666},
{'a': 0.1438546172653752,
'f1': 0.0205481294209408,
'p': 0.010405317062540859,
'r': 0.814498933901919},
{'a': 0.2977433739358724,
'f1': 0.34090044388078633,
'p': 0.2097658862876254,
'r': 0.9094248429192847},
{'a': 0.13232175262916238,
'f1': 0.009138348605527952,
'p': 0.004596950438343088,
'r': 0.756198347107438},
{'a': 0.13052990854592394,
'f1': 0.006102440976390557,
'p': 0.0030638639845299983,
'r': 0.7393939393939394},
{'a': 0.6091557395905222,
'f1': 0.7314683104156788,
'p': 0.5882053889171327,
'r': 0.9669870455495194},
{'a': 0.13270142180094788,
'f1': 0.009873588151694217,
'p': 0.004970004267175381,
'r': 0.7388059701492538},
{'a': 0.13332897118803028,
'f1': 0.012230287362036393,
'p': 0.0061629421785750075,
'r': 0.7884615384615384},
{'a': 0.1631658123467796,
'f1': 0.01443616916565504,
'p': 0.00728959575878065,
'r': 0.7357859531772575},
{'a': 0.16900046952246803,
'f1': 0.031107103754749788,
'p': 0.01584749655489205,
'r': 0.8385416666666666},
{'a': 0.13087885467362126,
'f1': 0.006436804550671124,
'p': 0.0032321106434155143,
'r': 0.7588235294117647},
{'a': 0.12957851384507535,
'f1': 0.003311175216355199,
'p': 0.001659208607773141,
'r': 0.7586206896551724},
{'a': 0.13055859802847755,
'f1': 0.005960729312762974,
'p': 0.0029921299439289936,
'r': 0.7579617834394905},
{'a': 0.16678690344062153,
'f1': 0.023542418625825123,
'p': 0.01194049543160603,
'r': 0.8302752293577982}]},
{'entertainment.msn.com;2000': [{'a': 0.9490060501296457,
'f1': 0.8605200945626478,
'p': 0.8708133971291866,
'r': 0.8504672897196262},
{'a': 0.9085894405043341,
'f1': 0.7993079584775087,
'p': 0.8339350180505415,
'r': 0.7674418604651163},
{'a': 0.9312134977287476,
'f1': 0.9058614564831261,
'p': 0.9497206703910615,
'r': 0.865874363327674},
{'a': 0.8932355338223309,
'f1': 0.7673179396092362,
'p': 0.7152317880794702,
'r': 0.8275862068965517},
{'a': 0.9014557670772676,
'f1': 0.7864077669902912,
'p': 0.8526315789473684,
'r': 0.7297297297297297},
{'a': 0.9283464566929134,
'f1': 0.855784469096672,
'p': 0.8517350157728707,
'r': 0.8598726114649682},
{'a': 0.916030534351145,
'f1': 0.8912848158131176,
'p': 0.9483747609942639,
'r': 0.8406779661016949},
{'a': 0.8743633276740238,
'f1': 0.7131782945736435,
'p': 0.6237288135593221,
'r': 0.832579185520362},
{'a': 0.8961965134706814,
'f1': 0.8120516499282641,
'p': 0.7861111111111111,
'r': 0.8397626112759644},
{'a': 0.8709677419354839,
'f1': 0.6589861751152073,
'p': 0.5674603174603174,
'r': 0.7857142857142857},
{'a': 0.9037656903765691,
'f1': 0.8804159445407279,
'p': 0.8581081081081081,
'r': 0.9039145907473309},
{'a': 0.9483362521891419,
'f1': 0.9034369885433715,
'p': 0.8990228013029316,
'r': 0.9078947368421053},
{'a': 0.7321052631578947,
'f1': 0.6581598388179987,
'p': 0.9477756286266924,
'r': 0.5041152263374485},
{'a': 0.9530469530469531,
'f1': 0.6356589147286822,
'p': 0.6029411764705882,
'r': 0.6721311475409836},
{'a': 0.9405477621910487,
'f1': 0.8959064327485381,
'p': 0.9341463414634147,
'r': 0.8606741573033708},
{'a': 0.9328579916815211,
'f1': 0.8986547085201794,
'p': 0.9488636363636364,
'r': 0.8534923339011925},
{'a': 0.9454148471615721,
'f1': 0.8898678414096917,
'p': 0.9181818181818182,
'r': 0.8632478632478633},
{'a': 0.861646234676007,
'f1': 0.6901960784313725,
'p': 0.6285714285714286,
'r': 0.7652173913043478},
{'a': 0.8773062730627307,
'f1': 0.6683291770573566,
'p': 0.5877192982456141,
'r': 0.7745664739884393},
{'a': 0.8770864946889226,
'f1': 0.7768595041322314,
'p': 0.7212276214833759,
'r': 0.8417910447761194},
{'a': 0.9256449165402124,
'f1': 0.8915929203539822,
'p': 0.9372093023255814,
'r': 0.8502109704641351},
{'a': 0.9379900213827512,
'f1': 0.9186155285313378,
'p': 0.9478764478764479,
'r': 0.8911070780399274},
{'a': 0.9196093163035312,
'f1': 0.8528198074277854,
'p': 0.9171597633136095,
'r': 0.7969151670951157},
{'a': 0.9389221556886228,
'f1': 0.9139966273187183,
'p': 0.9525483304042179,
'r': 0.8784440842787682},
{'a': 0.9531304651976216,
'f1': 0.9621896162528217,
'p': 0.96875,
'r': 0.9557174887892377}],
'entertainment.msn.com;2005': [{'a': 0.6713338246131172,
'f1': 0.6715758468335787,
'p': 0.5402843601895735,
'r': 0.8871595330739299},
{'a': 0.5208333333333334,
'f1': 0.19936708860759492,
'p': 0.11645101663585952,
'r': 0.6923076923076923},
{'a': 0.47909527073337904,
'f1': 0.23387096774193547,
'p': 0.14129110840438489,
'r': 0.6783625730994152},
{'a': 0.6582446808510638,
'f1': 0.578688524590164,
'p': 0.42530120481927713,
'r': 0.9051282051282051},
{'a': 0.6818505338078292,
'f1': 0.6885017421602788,
'p': 0.5613636363636364,
'r': 0.8900900900900901},
{'a': 0.644927536231884,
'f1': 0.5824941905499613,
'p': 0.44761904761904764,
'r': 0.8337028824833703},
{'a': 0.6297935103244838,
'f1': 0.5931928687196111,
'p': 0.44471445929526127,
'r': 0.8905109489051095},
{'a': 0.6943111720356409,
'f1': 0.7126288659793815,
'p': 0.5864262990455992,
'r': 0.9080459770114943},
{'a': 0.6831955922865014,
'f1': 0.6054888507718696,
'p': 0.4644736842105263,
'r': 0.8694581280788177},
{'a': 0.7369093231162197,
'f1': 0.8037160552644116,
'p': 0.7897940074906367,
'r': 0.8181377303588748},
{'a': 0.6730169726359543,
'f1': 0.7581967213114754,
'p': 0.7115384615384616,
'r': 0.8114035087719298},
{'a': 0.6763540290620872,
'f1': 0.6606648199445984,
'p': 0.5151187904967602,
'r': 0.9208494208494209},
{'a': 0.6474719101123596,
'f1': 0.6404011461318052,
'p': 0.4944690265486726,
'r': 0.9085365853658537},
{'a': 0.7642312864186463,
'f1': 0.8040238450074515,
'p': 0.7052287581699347,
'r': 0.9350086655112652},
{'a': 0.5548780487804879,
'f1': 0.41948310139165007,
'p': 0.2776315789473684,
'r': 0.8577235772357723},
{'a': 0.6096866096866097,
'f1': 0.358814352574103,
'p': 0.23279352226720648,
'r': 0.782312925170068},
{'a': 0.6689549961861174,
'f1': 0.6646058732612056,
'p': 0.524390243902439,
'r': 0.9071729957805907},
{'a': 0.5023659305993691,
'f1': 0.27052023121387286,
'p': 0.16295264623955433,
'r': 0.7959183673469388},
{'a': 0.6551440329218107,
'f1': 0.6028436018957346,
'p': 0.45170454545454547,
'r': 0.905982905982906},
{'a': 0.6687163720215219,
'f1': 0.6475878986099755,
'p': 0.5057471264367817,
'r': 0.9},
{'a': 0.5804038893044129,
'f1': 0.3249097472924188,
'p': 0.2102803738317757,
'r': 0.7142857142857143},
{'a': 0.5838192419825073,
'f1': 0.4888093106535363,
'p': 0.3674293405114401,
'r': 0.7299465240641712},
{'a': 0.6605504587155964,
'f1': 0.5668896321070234,
'p': 0.41903584672435107,
'r': 0.875968992248062},
{'a': 0.7252042007001167,
'f1': 0.7307032590051457,
'p': 0.6016949152542372,
'r': 0.9301310043668122},
{'a': 0.6313432835820896,
'f1': 0.606060606060606,
'p': 0.4540023894862604,
'r': 0.9112709832134293},
{'a': 0.5174825174825175,
'f1': 0.23503325942350328,
'p': 0.15657311669128507,
'r': 0.4711111111111111},
{'a': 0.6824005394470668,
'f1': 0.6713189113747383,
'p': 0.5320796460176991,
'r': 0.9092627599243857}],
'entertainment.msn.com;2010': [{'a': 0.34710193204530315,
'f1': 0.16595744680851063,
'p': 0.09219858156028368,
'r': 0.8297872340425532},
{'a': 0.47259599606170005,
'f1': 0.4225655767157743,
'p': 0.279467680608365,
'r': 0.865979381443299},
{'a': 0.602038006058937,
'f1': 0.5184938353882039,
'p': 0.3778533268576979,
'r': 0.8259023354564756},
{'a': 0.9065810593900482,
'f1': 0.8565795958600296,
'p': 0.8244781783681214,
'r': 0.8912820512820513},
{'a': 0.37843053173241853,
'f1': 0.29447554149428085,
'p': 0.18327779460769464,
'r': 0.7487623762376238},
{'a': 0.5584045584045584,
'f1': 0.3075941289087428,
'p': 0.1917263325377884,
'r': 0.7774193548387097},
{'a': 0.9308794605226187,
'f1': 0.5867861142217244,
'p': 0.5255767301905717,
'r': 0.6641318124207858},
{'a': 0.6821100917431193,
'f1': 0.5756276791181875,
'p': 0.45542635658914726,
'r': 0.7820299500831946},
{'a': 0.8918558077436582,
'f1': 0.799752781211372,
'p': 0.7629716981132075,
'r': 0.8402597402597403},
{'a': 0.9138234259584945,
'f1': 0.8416289592760181,
'p': 0.8335467349551856,
'r': 0.8498694516971279},
{'a': 0.6992850759606791,
'f1': 0.614768174012593,
'p': 0.5104562737642585,
'r': 0.7726618705035971},
{'a': 0.39193446754883426,
'f1': 0.25193798449612403,
'p': 0.1480637813211845,
'r': 0.8441558441558441},
{'a': 0.9255328110521439,
'f1': 0.7086354647330256,
'p': 0.6966947504860661,
'r': 0.7209926224010731},
{'a': 0.4112280701754386,
'f1': 0.3043117744610282,
'p': 0.18927282104177412,
'r': 0.7758985200845666},
{'a': 0.6998666073810582,
'f1': 0.6113989637305699,
'p': 0.4814143245693563,
'r': 0.8375394321766562},
{'a': 0.45671459161823186,
'f1': 0.41960784313725485,
'p': 0.2754182754182754,
'r': 0.8806584362139918},
{'a': 0.81045197740113,
'f1': 0.8375695957395303,
'p': 0.76650420912716,
'r': 0.9231590181430096},
{'a': 0.9292250500428939,
'f1': 0.6247156937073541,
'p': 0.6181545386346586,
'r': 0.6314176245210728},
{'a': 0.6466584158415841,
'f1': 0.4535885167464115,
'p': 0.3118421052631579,
'r': 0.8315789473684211},
{'a': 0.7744025921425678,
'f1': 0.7153806847215125,
'p': 0.6289308176100629,
'r': 0.8293838862559242},
{'a': 0.7549271636675235,
'f1': 0.7873078829945463,
'p': 0.7020335985853228,
'r': 0.8961625282167043},
{'a': 0.4852849092047589,
'f1': 0.46692607003891046,
'p': 0.3167619885613726,
'r': 0.8877928483353884},
{'a': 0.7680274704311332,
'f1': 0.7445378151260503,
'p': 0.6577579806978471,
'r': 0.8576960309777347},
{'a': 0.39532642736689955,
'f1': 0.3062465450525152,
'p': 0.18998628257887518,
'r': 0.7891737891737892},
{'a': 0.697092084006462,
'f1': 0.6418338108882521,
'p': 0.518118735543562,
'r': 0.8431618569636136}],
'entertainment.msn.com;2015': [{'a': 0.611458926199962,
'f1': 0.3059979667909184,
'p': 0.1896262074758505,
'r': 0.7921052631578948},
{'a': 0.5757856774858321,
'f1': 0.08855435023245517,
'p': 0.047014574518100614,
'r': 0.7604562737642585},
{'a': 0.606508875739645,
'f1': 0.2200938232994527,
'p': 0.1287151348879744,
'r': 0.7587601078167115},
{'a': 0.576417419884963,
'f1': 0.08396268325188805,
'p': 0.044607033278262924,
'r': 0.7132075471698113},
{'a': 0.5764862375933695,
'f1': 0.08570797437596643,
'p': 0.045722366250294605,
'r': 0.6830985915492958},
{'a': 0.5677327357914036,
'f1': 0.08809626128061883,
'p': 0.046835732236691796,
'r': 0.740072202166065},
{'a': 0.5240807437808862,
'f1': 0.17890173410404622,
'p': 0.10165872885531287,
'r': 0.7448856799037304},
{'a': 0.5725773195876289,
'f1': 0.06705670567056705,
'p': 0.03525792711784193,
'r': 0.6834862385321101},
{'a': 0.5741364379871067,
'f1': 0.11797528895974492,
'p': 0.06390328151986183,
'r': 0.7668393782383419},
{'a': 0.5746238308255388,
'f1': 0.07228381374722838,
'p': 0.03827189481098849,
'r': 0.649402390438247},
{'a': 0.5089755213055304,
'f1': 0.0882154882154882,
'p': 0.04668567355666429,
'r': 0.7987804878048781},
{'a': 0.5848923045808474,
'f1': 0.12305063020722067,
'p': 0.0675739089629282,
'r': 0.6873508353221957},
{'a': 0.5846794173025468,
'f1': 0.1722299032194351,
'p': 0.09637488947833775,
'r': 0.8089053803339518},
{'a': 0.5572192513368984,
'f1': 0.11297234125438256,
'p': 0.06110408765276022,
'r': 0.7474226804123711},
{'a': 0.5301295288154684,
'f1': 0.049734244495064535,
'p': 0.02577218178241196,
'r': 0.7081081081081081},
{'a': 0.551,
'f1': 0.19547157517510996,
'p': 0.11156563778356267,
'r': 0.7884362680683311},
{'a': 0.5754647621048209,
'f1': 0.05206378986866791,
'p': 0.026981040350024306,
'r': 0.74},
{'a': 0.5662180612443924,
'f1': 0.15980355119002645,
'p': 0.0879783693843594,
'r': 0.8703703703703703},
{'a': 0.5488980059154661,
'f1': 0.09216589861751152,
'p': 0.049352251696483655,
'r': 0.6956521739130435},
{'a': 0.534936598163533,
'f1': 0.17575945443273405,
'p': 0.09898743016759777,
'r': 0.7831491712707183},
{'a': 0.6135714285714285,
'f1': 0.3474065138721351,
'p': 0.2237327636434259,
'r': 0.7768037761294673},
{'a': 0.5759047619047619,
'f1': 0.1794730053436521,
'p': 0.1007864238410596,
'r': 0.8184873949579832},
{'a': 0.567614566284779,
'f1': 0.047329276538201494,
'p': 0.024758311718934212,
'r': 0.5357142857142857},
{'a': 0.5681701428131101,
'f1': 0.059521145670172304,
'p': 0.03107476635514019,
'r': 0.7037037037037037},
{'a': 0.5815987073472103,
'f1': 0.19436310395314788,
'p': 0.1105098855359001,
'r': 0.8057663125948407}],
'news.bbc.co.uk;2000': [{'a': 0.8686186186186187,
'f1': 0.771838331160365,
'p': 0.8222222222222222,
'r': 0.7272727272727273},
{'a': 0.8917631041524847,
'f1': 0.8049079754601227,
'p': 0.923943661971831,
'r': 0.7130434782608696},
{'a': 0.8820286659316428,
'f1': 0.8687116564417179,
'p': 0.963265306122449,
'r': 0.7910614525139665},
{'a': 0.9178263750828363,
'f1': 0.8993506493506495,
'p': 0.9535283993115319,
'r': 0.8509984639016898},
{'a': 0.8798816568047337,
'f1': 0.8581411600279524,
'p': 0.9460708782742681,
'r': 0.7851662404092071},
{'a': 0.850887573964497,
'f1': 0.7709090909090909,
'p': 0.9401330376940134,
'r': 0.6533127889060092},
{'a': 0.9075757575757576,
'f1': 0.8443877551020408,
'p': 0.9245810055865922,
'r': 0.7769953051643192},
{'a': 0.8920510304219823,
'f1': 0.8312883435582821,
'p': 0.8713826366559485,
'r': 0.7947214076246334},
{'a': 0.890691114245416,
'f1': 0.7896879240162822,
'p': 0.8981481481481481,
'r': 0.7046004842615012},
{'a': 0.8970588235294118,
'f1': 0.8642659279778392,
'p': 0.9454545454545454,
'r': 0.7959183673469388},
{'a': 0.9136690647482014,
'f1': 0.8481012658227849,
'p': 0.925414364640884,
'r': 0.7827102803738317},
{'a': 0.8580908626850434,
'f1': 0.8220230473751601,
'p': 0.9224137931034483,
'r': 0.7413394919168591},
{'a': 0.8842268842268842,
'f1': 0.7545304777594728,
'p': 0.89453125,
'r': 0.6524216524216524},
{'a': 0.8755681818181819,
'f1': 0.8342165026495081,
'p': 0.9198664440734557,
'r': 0.7631578947368421},
{'a': 0.8826458036984353,
'f1': 0.8212351029252437,
'p': 0.8633257403189066,
'r': 0.7830578512396694},
{'a': 0.8836477987421384,
'f1': 0.8307410795974383,
'p': 0.9438669438669439,
'r': 0.7418300653594772},
{'a': 0.8985322271857051,
'f1': 0.8472622478386167,
'p': 0.9423076923076923,
'r': 0.7696335078534031},
{'a': 0.8781163434903048,
'f1': 0.8503401360544218,
'p': 0.9585889570552147,
'r': 0.7640586797066015},
{'a': 0.88,
'f1': 0.8616504854368933,
'p': 0.9633649932157394,
'r': 0.7793633369923162},
{'a': 0.8943843379701185,
'f1': 0.8661005878510778,
'p': 0.9608695652173913,
'r': 0.7883472057074911},
{'a': 0.8612786489746683,
'f1': 0.8067226890756302,
'p': 0.9467455621301775,
'r': 0.7027818448023426},
{'a': 0.9059434506635892,
'f1': 0.8809349890430972,
'p': 0.9571428571428572,
'r': 0.8159675236806495},
{'a': 0.9203539823008849,
'f1': 0.9018691588785046,
'p': 0.9554455445544554,
'r': 0.8539823008849557},
{'a': 0.9004149377593361,
'f1': 0.8441558441558442,
'p': 0.8883826879271071,
'r': 0.8041237113402062},
{'a': 0.8964646464646465,
'f1': 0.8432122370936903,
'p': 0.9423076923076923,
'r': 0.7629757785467128}],
'news.bbc.co.uk;2005': [{'a': 0.8558139534883721,
'f1': 0.735042735042735,
'p': 0.6635802469135802,
'r': 0.8237547892720306},
{'a': 0.9051262433052792,
'f1': 0.8233618233618233,
'p': 0.8947368421052632,
'r': 0.762532981530343},
{'a': 0.8707976268951879,
'f1': 0.8310344827586207,
'p': 0.9341085271317829,
'r': 0.7484472049689441},
{'a': 0.9127465857359636,
'f1': 0.879074658254469,
'p': 0.8893617021276595,
'r': 0.8690228690228691},
{'a': 0.854278728606357,
'f1': 0.8419936373276776,
'p': 0.928654970760234,
'r': 0.7701260911736179},
{'a': 0.89375,
'f1': 0.8783977110157367,
'p': 0.8989751098096632,
'r': 0.8587412587412587},
{'a': 0.882051282051282,
'f1': 0.8937421251574968,
'p': 0.9374449339207048,
'r': 0.8539325842696629},
{'a': 0.9084652443220922,
'f1': 0.8627450980392156,
'p': 0.9247787610619469,
'r': 0.8085106382978723},
{'a': 0.9001597444089456,
'f1': 0.7999999999999999,
'p': 0.8802816901408451,
'r': 0.7331378299120235},
{'a': 0.8485499462943072,
'f1': 0.8237500000000001,
'p': 0.8941655359565808,
'r': 0.7636152954808807},
{'a': 0.8938237335183901,
'f1': 0.8521739130434782,
'p': 0.9264705882352942,
'r': 0.7889087656529516},
{'a': 0.9042954031650339,
'f1': 0.8304405874499332,
'p': 0.9014492753623189,
'r': 0.7698019801980198},
{'a': 0.8670284938941656,
'f1': 0.8055555555555555,
'p': 0.9164785553047404,
'r': 0.7185840707964601},
{'a': 0.8705255140898706,
'f1': 0.7956730769230769,
'p': 0.8132678132678133,
'r': 0.7788235294117647},
{'a': 0.8936742934051144,
'f1': 0.7953367875647669,
'p': 0.9191616766467066,
'r': 0.7009132420091324},
{'a': 0.8808208366219415,
'f1': 0.7673343605546995,
'p': 0.8767605633802817,
'r': 0.6821917808219178},
{'a': 0.8889763779527559,
'f1': 0.7736757624398073,
'p': 0.8763636363636363,
'r': 0.6925287356321839},
{'a': 0.8964530892448512,
'f1': 0.8794137241838774,
'p': 0.9510086455331412,
'r': 0.8178438661710037},
{'a': 0.7067624177139438,
'f1': 0.513888888888889,
'p': 0.4085173501577287,
'r': 0.6925133689839572},
{'a': 0.8678097345132744,
'f1': 0.851828890266584,
'p': 0.9528432732316228,
'r': 0.7701793721973094},
{'a': 0.7992054893463344,
'f1': 0.8062717770034844,
'p': 0.9739057239057239,
'r': 0.687871581450654},
{'a': 0.910483870967742,
'f1': 0.7844660194174755,
'p': 0.8820960698689956,
'r': 0.7062937062937062},
{'a': 0.8644240570846076,
'f1': 0.8383961117861483,
'p': 0.9623430962343096,
'r': 0.7427341227125942},
{'a': 0.7578499201703034,
'f1': 0.665686994856723,
'p': 0.592156862745098,
'r': 0.7600671140939598},
{'a': 0.8881916714204221,
'f1': 0.8478260869565218,
'p': 0.9430051813471503,
'r': 0.770098730606488}],
'news.bbc.co.uk;2010': [{'a': 0.7731256085686465,
'f1': 0.713876381498158,
'p': 0.6502609992542878,
'r': 0.7912885662431942},
{'a': 0.7326007326007326,
'f1': 0.5415212840195395,
'p': 0.4249726177437021,
'r': 0.7461538461538462},
{'a': 0.6980255516840883,
'f1': 0.42647058823529416,
'p': 0.31624863685932386,
'r': 0.654627539503386},
{'a': 0.72301024428684,
'f1': 0.5347452018530774,
'p': 0.413510747185261,
'r': 0.7565543071161048},
{'a': 0.7402536402066698,
'f1': 0.3026481715006305,
'p': 0.1951219512195122,
'r': 0.6741573033707865},
{'a': 0.7278887303851641,
'f1': 0.5886792452830188,
'p': 0.4776902887139108,
'r': 0.7668539325842697},
{'a': 0.6578171091445427,
'f1': 0.15942028985507245,
'p': 0.09166666666666666,
'r': 0.6111111111111112},
{'a': 0.7362683438155136,
'f1': 0.43688451208594453,
'p': 0.33062330623306235,
'r': 0.6437994722955145},
{'a': 0.7378599054576708,
'f1': 0.43202979515828677,
'p': 0.31309041835357626,
'r': 0.6966966966966966},
{'a': 0.7670623145400594,
'f1': 0.7169130905156872,
'p': 0.6859903381642513,
'r': 0.7507552870090635},
{'a': 0.7341941228851291,
'f1': 0.43304843304843305,
'p': 0.31361760660247595,
'r': 0.6993865030674846},
{'a': 0.7330556607345703,
'f1': 0.5987478656801367,
'p': 0.5038314176245211,
'r': 0.7377279102384292},
{'a': 0.7322999581064097,
'f1': 0.4589331075359865,
'p': 0.3417402269861286,
'r': 0.6984536082474226},
{'a': 0.7293459552495697,
'f1': 0.4572907679033649,
'p': 0.33672172808132145,
'r': 0.7123655913978495},
{'a': 0.662057044079516,
'f1': 0.21956087824351295,
'p': 0.13157894736842105,
'r': 0.6626506024096386},
{'a': 0.7407770159973882,
'f1': 0.6361136571952337,
'p': 0.5721352019785655,
'r': 0.7162022703818369},
{'a': 0.7302886686772943,
'f1': 0.49920000000000003,
'p': 0.3804878048780488,
'r': 0.7255813953488373},
{'a': 0.7404606393949811,
'f1': 0.564339296018465,
'p': 0.46350710900473935,
'r': 0.7212389380530974},
{'a': 0.7317505315379164,
'f1': 0.5815367606412383,
'p': 0.48613678373382624,
'r': 0.7235213204951857},
{'a': 0.7516756032171582,
'f1': 0.688,
'p': 0.603397341211226,
'r': 0.8001958863858962},
{'a': 0.7564102564102564,
'f1': 0.7170513775130305,
'p': 0.6394422310756972,
'r': 0.8161016949152542},
{'a': 0.730672268907563,
'f1': 0.4758789860997547,
'p': 0.3548780487804878,
'r': 0.7220843672456576},
{'a': 0.7360406091370558,
'f1': 0.6435100548446069,
'p': 0.5794238683127572,
'r': 0.723535457348407},
{'a': 0.7270351008215086,
'f1': 0.5364616360177552,
'p': 0.43075356415478616,
'r': 0.7109243697478992},
{'a': 0.7407253022092538,
'f1': 0.47906197654941374,
'p': 0.3579474342928661,
'r': 0.7240506329113924}],
'news.bbc.co.uk;2015': [{'a': 0.5566439522998297,
'f1': 0.23474638568978193,
'p': 0.1398948598130841,
'r': 0.7290715372907154},
{'a': 0.5631616090331687,
'f1': 0.20498330336501414,
'p': 0.1203983101991551,
'r': 0.689119170984456},
{'a': 0.599853067221746,
'f1': 0.3887018331462776,
'p': 0.26430933604680745,
'r': 0.7342756183745583},
{'a': 0.5449867685974713,
'f1': 0.13908205841446453,
'p': 0.07739938080495357,
'r': 0.684931506849315},
{'a': 0.5808367427741851,
'f1': 0.36783439490445863,
'p': 0.23832860459117874,
'r': 0.8055797733217088},
{'a': 0.558343057176196,
'f1': 0.18994114499732478,
'p': 0.10973724884080371,
'r': 0.705765407554672},
{'a': 0.565053161723559,
'f1': 0.20056569812291078,
'p': 0.11854103343465046,
'r': 0.6510851419031719},
{'a': 0.570228904239469,
'f1': 0.25358739120207013,
'p': 0.15537618910348805,
'r': 0.6892583120204604},
{'a': 0.5453309156844969,
'f1': 0.11107828655834563,
'p': 0.060430729668916744,
'r': 0.6861313868613139},
{'a': 0.5545289306270828,
'f1': 0.17457198989615494,
'p': 0.09907613889773813,
'r': 0.7334905660377359},
{'a': 0.4714000920104279,
'f1': 0.05948158253751706,
'p': 0.03120526767821357,
'r': 0.6337209302325582},
{'a': 0.5347721822541966,
'f1': 0.07398568019093078,
'p': 0.039440203562340966,
'r': 0.5961538461538461},
{'a': 0.574412189675555,
'f1': 0.2965696917064698,
'p': 0.18600217864923746,
'r': 0.7312633832976445},
{'a': 0.5579530543811051,
'f1': 0.15964523281596452,
'p': 0.09011264080100125,
'r': 0.6990291262135923},
{'a': 0.5575912512427212,
'f1': 0.19363189231167485,
'p': 0.1120767156128259,
'r': 0.7110266159695817},
{'a': 0.5579038795599305,
'f1': 0.1526082130965594,
'p': 0.08620689655172414,
'r': 0.6642512077294686},
{'a': 0.5776113191843529,
'f1': 0.26181818181818184,
'p': 0.1607621315867818,
'r': 0.7049608355091384},
{'a': 0.5755836702752054,
'f1': 0.3152356902356902,
'p': 0.2,
'r': 0.7437934458788481},
{'a': 0.5660351455244371,
'f1': 0.2961478512580717,
'p': 0.18329658213891953,
'r': 0.7705677867902665},
{'a': 0.560966402416006,
'f1': 0.24333116460637608,
'p': 0.14853057982525814,
'r': 0.6726618705035972},
{'a': 0.5551107934238741,
'f1': 0.23349753694581282,
'p': 0.13771063335270192,
'r': 0.7669902912621359},
{'a': 0.5621038435603506,
'f1': 0.2256141187693775,
'p': 0.13568559954102122,
'r': 0.669024045261669},
{'a': 0.5734718661449197,
'f1': 0.32298136645962733,
'p': 0.20417005144868672,
'r': 0.7725409836065574},
{'a': 0.5657637531415806,
'f1': 0.2444120505344995,
'p': 0.14742086752637748,
'r': 0.7144886363636364},
{'a': 0.5474794841735052,
'f1': 0.15965166908563136,
'p': 0.09098428453267163,
'r': 0.650887573964497}],
'news.yahoo.com;2000': [{'a': 0.8671713695801789,
'f1': 0.8763613068545804,
'p': 0.8964613368283093,
'r': 0.8571428571428571},
{'a': 0.9057164068299925,
'f1': 0.8886941279579317,
'p': 0.9371534195933456,
'r': 0.845},
{'a': 0.8834586466165414,
'f1': 0.8683651804670913,
'p': 0.8485477178423236,
'r': 0.8891304347826087},
{'a': 0.8652037617554859,
'f1': 0.8268456375838926,
'p': 0.7642679900744417,
'r': 0.9005847953216374},
{'a': 0.879277566539924,
'f1': 0.8577827547592385,
'p': 0.8272138228941684,
'r': 0.8906976744186047},
{'a': 0.8709981167608286,
'f1': 0.8502732240437159,
'p': 0.8329764453961456,
'r': 0.8683035714285714},
{'a': 0.8131625441696113,
'f1': 0.8345717637856863,
'p': 0.9595323741007195,
'r': 0.7384083044982699},
{'a': 0.8935717326521925,
'f1': 0.8926116838487972,
'p': 0.9549632352941176,
'r': 0.8379032258064516},
{'a': 0.892225201072386,
'f1': 0.87569573283859,
'p': 0.9352708058124174,
'r': 0.8232558139534883},
{'a': 0.9017808958445763,
'f1': 0.8748280605226959,
'p': 0.9592760180995475,
'r': 0.8040455120101138},
{'a': 0.8637184115523465,
'f1': 0.8309070548712206,
'p': 0.8244444444444444,
'r': 0.837471783295711},
{'a': 0.9010513296227582,
'f1': 0.9162303664921466,
'p': 0.9162303664921466,
'r': 0.9162303664921466},
{'a': 0.8695238095238095,
'f1': 0.8209150326797385,
'p': 0.7830423940149626,
'r': 0.8626373626373627},
{'a': 0.8734290843806104,
'f1': 0.8605341246290802,
'p': 0.8146067415730337,
'r': 0.9119496855345912},
{'a': 0.8950094161958568,
'f1': 0.9183449285975833,
'p': 0.9166666666666666,
'r': 0.9200293470286134},
{'a': 0.8652365236523653,
'f1': 0.851963746223565,
'p': 0.8660933660933661,
'r': 0.8382877526753865},
{'a': 0.8655804480651731,
'f1': 0.8047337278106508,
'p': 0.7749287749287749,
'r': 0.8369230769230769},
{'a': 0.8494117647058823,
'f1': 0.7538461538461538,
'p': 0.6666666666666666,
'r': 0.8672566371681416},
{'a': 0.8923076923076924,
'f1': 0.8694638694638694,
'p': 0.8496583143507973,
'r': 0.8902147971360382},
{'a': 0.8704693998811646,
'f1': 0.8050089445438283,
'p': 0.8704061895551257,
'r': 0.7487520798668885},
{'a': 0.8586296617519514,
'f1': 0.6369710467706012,
'p': 0.5983263598326359,
'r': 0.680952380952381},
{'a': 0.8660640920295809,
'f1': 0.8446139180171592,
'p': 0.8358490566037736,
'r': 0.8535645472061657},
{'a': 0.8545454545454545,
'f1': 0.7875,
'p': 0.72,
'r': 0.8689655172413793},
{'a': 0.8989441930618401,
'f1': 0.9058988764044943,
'p': 0.8799454297407913,
'r': 0.9334298118668596},
{'a': 0.9013840830449827,
'f1': 0.9107981220657276,
'p': 0.8967642526964561,
'r': 0.9252782193958664}],
'news.yahoo.com;2005': [{'a': 0.8183511877037727,
'f1': 0.7947368421052632,
'p': 0.7322987390882638,
'r': 0.8688147295742232},
{'a': 0.7659019812304484,
'f1': 0.6968264686022958,
'p': 0.6201923076923077,
'r': 0.7950693374422187},
{'a': 0.7780847145488029,
'f1': 0.76232741617357,
'p': 0.7497575169738119,
'r': 0.7753259779338014},
{'a': 0.7697095435684648,
'f1': 0.6738491674828598,
'p': 0.5704809286898839,
'r': 0.8229665071770335},
{'a': 0.7303727200634417,
'f1': 0.46708463949843265,
'p': 0.3669950738916256,
'r': 0.6422413793103449},
{'a': 0.7885554780181437,
'f1': 0.7191844300278035,
'p': 0.6413223140495867,
'r': 0.8185654008438819},
{'a': 0.7342874922215308,
'f1': 0.6049953746530989,
'p': 0.47186147186147187,
'r': 0.8427835051546392},
{'a': 0.7974530018192845,
'f1': 0.7230514096185738,
'p': 0.6383601756954612,
'r': 0.8336520076481836},
{'a': 0.907563025210084,
'f1': 0.8614609571788414,
'p': 0.8754266211604096,
'r': 0.8479338842975207},
{'a': 0.8127090301003345,
'f1': 0.8200803212851404,
'p': 0.7705660377358491,
'r': 0.8763948497854077},
{'a': 0.7315068493150685,
'f1': 0.5473441108545035,
'p': 0.4143356643356643,
'r': 0.8061224489795918},
{'a': 0.7706803720019579,
'f1': 0.7549045252419565,
'p': 0.7025316455696202,
'r': 0.8157150932730356},
{'a': 0.9134049186006234,
'f1': 0.9062265566391597,
'p': 0.94375,
'r': 0.8715728715728716},
{'a': 0.767531556802244,
'f1': 0.7984189723320159,
'p': 0.8257861635220126,
'r': 0.7728075338434374},
{'a': 0.8539823008849557,
'f1': 0.8855084067253803,
'p': 0.853834276891405,
'r': 0.9196230598669624},
{'a': 0.7414279951593384,
'f1': 0.7223906453009961,
'p': 0.748653500897666,
'r': 0.697907949790795},
{'a': 0.7356130108423686,
'f1': 0.4267631103074141,
'p': 0.30809399477806787,
'r': 0.6941176470588235},
{'a': 0.7660013764624914,
'f1': 0.6565656565656565,
'p': 0.5527210884353742,
'r': 0.8084577114427861},
{'a': 0.8418141592920354,
'f1': 0.845572354211663,
'p': 0.7806580259222333,
'r': 0.9222614840989399},
{'a': 0.8308996702779087,
'f1': 0.8449244060475163,
'p': 0.7912621359223301,
'r': 0.9063948100092678},
{'a': 0.7292954264524104,
'f1': 0.5539714867617107,
'p': 0.4243369734789392,
'r': 0.7976539589442815},
{'a': 0.8207322872087494,
'f1': 0.7851851851851852,
'p': 0.7298728813559322,
'r': 0.8495684340320592},
{'a': 0.8422509225092251,
'f1': 0.8477292965271593,
'p': 0.8129803586678053,
'r': 0.8855813953488372},
{'a': 0.724656638325703,
'f1': 0.536853685368537,
'p': 0.4073455759599332,
'r': 0.7870967741935484},
{'a': 0.8354430379746836,
'f1': 0.839647577092511,
'p': 0.7785947712418301,
'r': 0.9110898661567878}],
'news.yahoo.com;2010': [{'a': 0.8085855031667839,
'f1': 0.7397129186602871,
'p': 0.6945193171608266,
'r': 0.7911975435005117},
{'a': 0.787512100677638,
'f1': 0.4691656590084643,
'p': 0.36397748592870544,
'r': 0.6598639455782312},
{'a': 0.7816818002368733,
'f1': 0.6349834983498349,
'p': 0.5652173913043478,
'r': 0.7243975903614458},
{'a': 0.7841779074006562,
'f1': 0.6938986556359876,
'p': 0.6402671755725191,
'r': 0.7573363431151241},
{'a': 0.8142112125162972,
'f1': 0.7762951334379906,
'p': 0.7430503380916604,
'r': 0.8126540673788003},
{'a': 0.7743382682817407,
'f1': 0.5719148936170212,
'p': 0.45776566757493187,
'r': 0.7619047619047619},
{'a': 0.810077519379845,
'f1': 0.744186046511628,
'p': 0.6931918656056587,
'r': 0.8032786885245902},
{'a': 0.7936940443752433,
'f1': 0.6476063829787235,
'p': 0.5636574074074074,
'r': 0.7609375},
{'a': 0.7953358858336234,
'f1': 0.7461139896373057,
'p': 0.6934189406099518,
'r': 0.8074766355140187},
{'a': 0.7726879861711322,
'f1': 0.49423076923076925,
'p': 0.41252006420545745,
'r': 0.6163069544364509},
{'a': 0.7744186046511627,
'f1': 0.40197287299630086,
'p': 0.2921146953405018,
'r': 0.6442687747035574},
{'a': 0.77685546875,
'f1': 0.4578884934756821,
'p': 0.3374125874125874,
'r': 0.7121771217712177},
{'a': 0.8264140429088883,
'f1': 0.8249508288845182,
'p': 0.809707666850524,
'r': 0.8407789232531501},
{'a': 0.7669854051333669,
'f1': 0.3299565846599132,
'p': 0.22664015904572565,
'r': 0.6063829787234043},
{'a': 0.8100558659217877,
'f1': 0.753202391118702,
'p': 0.7241379310344828,
'r': 0.7846975088967971},
{'a': 0.7962010288880095,
'f1': 0.6834665027658268,
'p': 0.6157253599114064,
'r': 0.7679558011049724},
{'a': 0.7912660256410257,
'f1': 0.6519706078824316,
'p': 0.5816448152562574,
'r': 0.7416413373860182},
{'a': 0.7722975750102754,
'f1': 0.6031518624641834,
'p': 0.51278928136419,
'r': 0.7321739130434782},
{'a': 0.7965571205007824,
'f1': 0.6662387676508343,
'p': 0.6006944444444444,
'r': 0.7478386167146974},
{'a': 0.7967576117042309,
'f1': 0.6775407779171895,
'p': 0.6129398410896708,
'r': 0.7573632538569425},
{'a': 0.793205317577548,
'f1': 0.6747967479674798,
'p': 0.6064718162839249,
'r': 0.7604712041884817},
{'a': 0.800672268907563,
'f1': 0.7484089944845141,
'p': 0.7170731707317073,
'r': 0.782608695652174},
{'a': 0.8014388489208633,
'f1': 0.7280788177339902,
'p': 0.6817343173431735,
'r': 0.781183932346723},
{'a': 0.8021865391185514,
'f1': 0.7454945054945056,
'p': 0.6877534468775345,
'r': 0.8138195777351248},
{'a': 0.7823008849557522,
'f1': 0.5,
'p': 0.3867924528301887,
'r': 0.7068965517241379}],
'news.yahoo.com;2015': [{'a': 0.30295741540479965,
'f1': 0.034819468624630985,
'p': 0.017882133416264965,
'r': 0.6590257879656161},
{'a': 0.3424420154625433,
'f1': 0.02490610792646768,
'p': 0.012689844096201103,
'r': 0.6673728813559322},
{'a': 0.2618452286754233,
'f1': 0.018929833417465926,
'p': 0.009604302727621975,
'r': 0.6521739130434783},
{'a': 0.3434245519846892,
'f1': 0.04766024947725143,
'p': 0.024674306618388143,
'r': 0.6965226554267651},
{'a': 0.3823694728953285,
'f1': 0.02619270346117867,
'p': 0.013344035316544597,
'r': 0.7055702917771883},
{'a': 0.3314465245501519,
'f1': 0.0648652574998184,
'p': 0.03397116445391258,
'r': 0.7161186848436247},
{'a': 0.34803188839063276,
'f1': 0.05263538951636259,
'p': 0.027343162328870168,
'r': 0.7017374517374517},
{'a': 0.35030767626199416,
'f1': 0.05606697730802743,
'p': 0.029115517784073026,
'r': 0.7543323139653415},
{'a': 0.2687149401998881,
'f1': 0.0343801784894284,
'p': 0.0176214158531344,
'r': 0.702283849918434},
{'a': 0.3879459646692068,
'f1': 0.028587135788894994,
'p': 0.01458513947677087,
'r': 0.715},
{'a': 0.29187568012315207,
'f1': 0.008694359812736866,
'p': 0.0043761220825852785,
'r': 0.6573033707865169},
{'a': 0.33993334640266615,
'f1': 0.035450834347919506,
'p': 0.01823405901204553,
'r': 0.6354300385109114},
{'a': 0.36235252507389537,
'f1': 0.04754716981132075,
'p': 0.024609375,
'r': 0.7},
{'a': 0.3328076124385564,
'f1': 0.042694425586482614,
'p': 0.022069718474615925,
'r': 0.652073732718894},
{'a': 0.25543399402188105,
'f1': 0.02917360375359467,
'p': 0.014926817935413924,
'r': 0.6403654485049833},
{'a': 0.33430256940755443,
'f1': 0.022818681769679064,
'p': 0.011608000334043175,
'r': 0.6666666666666666},
{'a': 0.34148530401615435,
'f1': 0.033939216618512964,
'p': 0.01742264944427756,
'r': 0.6526019690576652},
{'a': 0.25900365924118895,
'f1': 0.013377211727498079,
'p': 0.00676449612068034,
'r': 0.5961904761904762},
{'a': 0.3307106344592563,
'f1': 0.033646888567293774,
'p': 0.01728560276569644,
'r': 0.6292286874154263},
{'a': 0.3483103404632325,
'f1': 0.04197521676031705,
'p': 0.021621621621621623,
'r': 0.7157360406091371},
{'a': 0.32858444253251684,
'f1': 0.018495912403358857,
'p': 0.009391435011269721,
'r': 0.6053268765133172},
{'a': 0.27623870035866727,
'f1': 0.025010931351115002,
'p': 0.012746518105849583,
'r': 0.661271676300578},
{'a': 0.34504122384787683,
'f1': 0.06890243902439025,
'p': 0.036212790200826094,
'r': 0.708217270194986},
{'a': 0.3480917465689039,
'f1': 0.06410256410256411,
'p': 0.03355704697986577,
'r': 0.7142857142857143},
{'a': 0.2616652849254642,
'f1': 0.010938122079500451,
'p': 0.005521238788289051,
'r': 0.5787139689578714}],
'thenation.com;2000': [{'a': 0.9068493150684932,
'f1': 0.9317269076305222,
'p': 0.9586776859504132,
'r': 0.90625},
{'a': 0.7897160399079048,
'f1': 0.811554332874828,
'p': 0.898021308980213,
'r': 0.740276035131744},
{'a': 0.8162251655629139,
'f1': 0.8260188087774296,
'p': 0.831230283911672,
'r': 0.8208722741433022},
{'a': 0.8721227621483376,
'f1': 0.9183303085299456,
'p': 0.9715821812596006,
'r': 0.8706125258086718},
{'a': 0.8639726807057484,
'f1': 0.8755856324830817,
'p': 0.9121475054229935,
'r': 0.8418418418418419},
{'a': 0.8900821225521163,
'f1': 0.9101239669421488,
'p': 0.9254201680672269,
'r': 0.8953252032520326},
{'a': 0.9285890783296269,
'f1': 0.9569748399583148,
'p': 0.9780888618381011,
'r': 0.9367531331973186},
{'a': 0.8420138888888888,
'f1': 0.89935218833939,
'p': 0.9753255654557916,
'r': 0.8343594253884491},
{'a': 0.8539865513928915,
'f1': 0.8933333333333334,
'p': 0.9478778853313478,
'r': 0.8447246184472462},
{'a': 0.7554304102976669,
'f1': 0.7682926829268294,
'p': 0.8857644991212654,
'r': 0.6783310901749664},
{'a': 0.6182608695652174,
'f1': 0.6783882783882783,
'p': 0.929718875502008,
'r': 0.5340253748558247},
{'a': 0.7846251588310038,
'f1': 0.7992895204262878,
'p': 0.872093023255814,
'r': 0.7377049180327869},
{'a': 0.9223057644110275,
'f1': 0.8652173913043478,
'p': 0.8805309734513275,
'r': 0.8504273504273504},
{'a': 0.9228915662650602,
'f1': 0.8632478632478633,
'p': 0.8820960698689956,
'r': 0.8451882845188284},
{'a': 0.8910256410256411,
'f1': 0.9192015209125476,
'p': 0.937015503875969,
'r': 0.9020522388059702},
{'a': 0.8377676833225178,
'f1': 0.8627881448957189,
'p': 0.89419795221843,
'r': 0.8335100742311771},
{'a': 0.8544776119402985,
'f1': 0.8900375939849624,
'p': 0.9348469891411648,
'r': 0.8493273542600897},
{'a': 0.7532351628737171,
'f1': 0.8222436515589844,
'p': 0.9785768936495792,
'r': 0.708980044345898},
{'a': 0.8970684039087948,
'f1': 0.920443101711984,
'p': 0.8969578017664377,
'r': 0.9451913133402275},
{'a': 0.9136400322841001,
'f1': 0.9288090485695276,
'p': 0.9148099606815203,
'r': 0.9432432432432433},
{'a': 0.5854472630173565,
'f1': 0.6901712955263596,
'p': 0.9876249405045217,
'r': 0.5304192229038854},
{'a': 0.6378167641325536,
'f1': 0.7576732458047126,
'p': 0.9846327683615819,
'r': 0.6157433578292821},
{'a': 0.6098265895953757,
'f1': 0.6765175718849841,
'p': 0.9246724890829694,
'r': 0.533375314861461},
{'a': 0.867913148371532,
'f1': 0.8945594607607127,
'p': 0.929,
'r': 0.8625812441968431},
{'a': 0.9187386294724075,
'f1': 0.9411764705882353,
'p': 0.9313640312771503,
'r': 0.9511978704525288}],
'thenation.com;2005': [{'a': 0.8506006006006006,
'f1': 0.885632183908046,
'p': 0.963125,
'r': 0.8196808510638298},
{'a': 0.8158995815899581,
'f1': 0.8,
'p': 0.7829181494661922,
'r': 0.8178438661710037},
{'a': 0.8745432399512789,
'f1': 0.9070397111913358,
'p': 0.9598853868194842,
'r': 0.8597091531223268},
{'a': 0.5586145648312612,
'f1': 0.6243386243386243,
'p': 0.9375709421112373,
'r': 0.4679886685552408},
{'a': 0.7632202052091555,
'f1': 0.7734138972809669,
'p': 0.7409551374819102,
'r': 0.8088467614533965},
{'a': 0.8947784810126582,
'f1': 0.912672357189757,
'p': 0.8887468030690537,
'r': 0.9379217273954116},
{'a': 0.8407185628742515,
'f1': 0.8683168316831682,
'p': 0.9106957424714434,
'r': 0.8297067171239356},
{'a': 0.86843853820598,
'f1': 0.8864678899082569,
'p': 0.8754246885617214,
'r': 0.8977932636469221},
{'a': 0.8272058823529411,
'f1': 0.8555327868852459,
'p': 0.9319196428571429,
'r': 0.790719696969697},
{'a': 0.9087771203155819,
'f1': 0.9196699956578375,
'p': 0.9363395225464191,
'r': 0.9035836177474402},
{'a': 0.6713810316139767,
'f1': 0.7188612099644128,
'p': 0.8603066439522998,
'r': 0.617359413202934},
{'a': 0.7759791122715405,
'f1': 0.8016643550624134,
'p': 0.8993775933609959,
'r': 0.7231025854879066},
{'a': 0.8909657320872274,
'f1': 0.910025706940874,
'p': 0.910025706940874,
'r': 0.910025706940874},
{'a': 0.761386811692726,
'f1': 0.8066115702479338,
'p': 0.8551401869158879,
'r': 0.7632950990615224},
{'a': 0.8468190240889438,
'f1': 0.888689407540395,
'p': 0.9620991253644315,
'r': 0.8256880733944955},
{'a': 0.889849795175239,
'f1': 0.9333333333333333,
'p': 0.97524467472654,
'r': 0.8948758584257792},
{'a': 0.8626292466765141,
'f1': 0.8654124457308249,
'p': 0.9006024096385542,
'r': 0.8328690807799443},
{'a': 0.9197422378441711,
'f1': 0.9370114942528737,
'p': 0.9357208448117539,
'r': 0.9383057090239411},
{'a': 0.8557763061074319,
'f1': 0.8611898016997167,
'p': 0.8397790055248618,
'r': 0.8837209302325582},
{'a': 0.9089718402095612,
'f1': 0.915808600847971,
'p': 0.9174757281553398,
'r': 0.9141475211608222},
{'a': 0.8357265939115451,
'f1': 0.8457389428263214,
'p': 0.8990825688073395,
'r': 0.7983706720977597},
{'a': 0.8567268491555038,
'f1': 0.8724066390041494,
'p': 0.9282560706401766,
'r': 0.8228962818003914},
{'a': 0.8819836785938481,
'f1': 0.8866103739445116,
'p': 0.8729216152019003,
'r': 0.9007352941176471},
{'a': 0.8951011714589989,
'f1': 0.9104138244656662,
'p': 0.9001798561151079,
'r': 0.920883164673413},
{'a': 0.8861493836113126,
'f1': 0.9134031991174849,
'p': 0.9272116461366181,
'r': 0.9}],
'thenation.com;2010': [{'a': 0.7026022304832714,
'f1': 0.6174863387978142,
'p': 0.5400238948626045,
'r': 0.7208931419457735},
{'a': 0.7331054430712596,
'f1': 0.6646845061116617,
'p': 0.5722411831626849,
'r': 0.7927501970055162},
{'a': 0.7065637065637066,
'f1': 0.6156763590391909,
'p': 0.5307901907356948,
'r': 0.7328818660647103},
{'a': 0.7767931320963722,
'f1': 0.6897613548883758,
'p': 0.632768361581921,
'r': 0.7580372250423012},
{'a': 0.7881459710996817,
'f1': 0.7555806725063577,
'p': 0.7092838196286472,
'r': 0.8083434099153567},
{'a': 0.7694665849172287,
'f1': 0.16814159292035397,
'p': 0.12709030100334448,
'r': 0.24836601307189543},
{'a': 0.7654590880699563,
'f1': 0.5839335180055403,
'p': 0.48482060717571296,
'r': 0.733983286908078},
{'a': 0.5516569200779727,
'f1': 0.44888178913738014,
'p': 0.32401268377053905,
'r': 0.7303443794671864},
{'a': 0.8948824343015215,
'f1': 0.9087269815852682,
'p': 0.9149536477226925,
'r': 0.9025844930417495},
{'a': 0.842443729903537,
'f1': 0.7921967769296014,
'p': 0.7915254237288135,
'r': 0.7928692699490663},
{'a': 0.8344968004653869,
'f1': 0.7454138702460851,
'p': 0.7313432835820896,
'r': 0.760036496350365},
{'a': 0.7440562833575934,
'f1': 0.716168953457089,
'p': 0.6150646950092421,
'r': 0.8570508692852543},
{'a': 0.6958211856171039,
'f1': 0.6477605221697051,
'p': 0.5904800984817399,
'r': 0.7173479561316052},
{'a': 0.6659949622166247,
'f1': 0.580379746835443,
'p': 0.46501014198782964,
'r': 0.7718855218855218},
{'a': 0.7730941704035874,
'f1': 0.6820276497695853,
'p': 0.607916355489171,
'r': 0.7767175572519084},
{'a': 0.7362662411670846,
'f1': 0.6057921635434411,
'p': 0.515063731170336,
'r': 0.7353184449958643},
{'a': 0.7998043690903163,
'f1': 0.7036679536679536,
'p': 0.6909952606635071,
'r': 0.7168141592920354},
{'a': 0.8087306145893165,
'f1': 0.7540620384047269,
'p': 0.7501836884643645,
'r': 0.7579806978470676},
{'a': 0.5770092547491475,
'f1': 0.47035862405464746,
'p': 0.3757552134086923,
'r': 0.6286273231170525},
{'a': 0.6311431623931624,
'f1': 0.4140857021637675,
'p': 0.31626701231367466,
'r': 0.5995085995085995},
{'a': 0.5660725261216963,
'f1': 0.471028971028971,
'p': 0.3343971631205674,
'r': 0.7964527027027027},
{'a': 0.838500563697858,
'f1': 0.8115751397566591,
'p': 0.7905188981422165,
'r': 0.8337837837837838},
{'a': 0.6296710673528754,
'f1': 0.5618215514958963,
'p': 0.43112555871596914,
'r': 0.8062310030395137},
{'a': 0.8253839935327405,
'f1': 0.771347918136909,
'p': 0.836906584992343,
'r': 0.7153141361256544},
{'a': 0.7795425667090216,
'f1': 0.6598039215686274,
'p': 0.6046720575022462,
'r': 0.7259978425026968}],
'thenation.com;2015': [{'a': 0.7043414989085617,
'f1': 0.74630593132154,
'p': 0.6378512984702953,
'r': 0.899197592778335},
{'a': 0.6152815013404825,
'f1': 0.5553834237025561,
'p': 0.42052785923753666,
'r': 0.8175598631698974},
{'a': 0.6486733760292772,
'f1': 0.6437847866419295,
'p': 0.5090464547677261,
'r': 0.8755256518082423},
{'a': 0.7672428546979292,
'f1': 0.8310139165009941,
'p': 0.7589650476622787,
'r': 0.9181768259198243},
{'a': 0.671793534932221,
'f1': 0.6988758670174597,
'p': 0.5713727023856081,
'r': 0.8996305418719212},
{'a': 0.6510180995475113,
'f1': 0.6591160220994475,
'p': 0.5361797752808989,
'r': 0.8551971326164874},
{'a': 0.6490370796205807,
'f1': 0.6520376175548589,
'p': 0.5250114731528224,
'r': 0.8601503759398497},
{'a': 0.5519215044971382,
'f1': 0.32345679012345685,
'p': 0.2062992125984252,
'r': 0.7485714285714286},
{'a': 0.6136758070114544,
'f1': 0.5539078156312625,
'p': 0.4140203714799281,
'r': 0.8365617433414043},
{'a': 0.6551724137931034,
'f1': 0.6568332329921734,
'p': 0.5222594542843466,
'r': 0.8848337388483374},
{'a': 0.5384265575261482,
'f1': 0.13763806287170774,
'p': 0.07795957651588066,
'r': 0.5869565217391305},
{'a': 0.6325625593918277,
'f1': 0.5952547103977669,
'p': 0.46383904295812944,
'r': 0.8305744888023369},
{'a': 0.6561322122354306,
'f1': 0.6580161476355247,
'p': 0.5309446254071661,
'r': 0.865049279757392},
{'a': 0.6528825837609418,
'f1': 0.6529873264936632,
'p': 0.5157292659675882,
'r': 0.8898026315789473},
{'a': 0.6559660811629315,
'f1': 0.6567975830815711,
'p': 0.5236030828516378,
'r': 0.8808752025931929},
{'a': 0.5444078947368421,
'f1': 0.29516539440203565,
'p': 0.1886178861788618,
'r': 0.6783625730994152},
{'a': 0.7298712633646083,
'f1': 0.7855905784551437,
'p': 0.6808766136295407,
'r': 0.9283667621776505},
{'a': 0.7792188206906953,
'f1': 0.8464120856063785,
'p': 0.7670659821258794,
'r': 0.9440673999531944},
{'a': 0.7272913676440435,
'f1': 0.7831050228310503,
'p': 0.6955388180764774,
'r': 0.8958955223880597},
{'a': 0.7473245057137674,
'f1': 0.8078355635260036,
'p': 0.7231415164238083,
'r': 0.915},
{'a': 0.6417541692402717,
'f1': 0.6272493573264781,
'p': 0.49643947100712105,
'r': 0.8516579406631762},
{'a': 0.6516439909297053,
'f1': 0.6682860998650473,
'p': 0.5541629364368845,
'r': 0.8416043507817811},
{'a': 0.7412719381015286,
'f1': 0.8026486253058874,
'p': 0.7250975292587777,
'r': 0.8987749838813669},
{'a': 0.5421372719374457,
'f1': 0.24390243902439027,
'p': 0.14847161572052403,
'r': 0.6827309236947792},
{'a': 0.6170431211498973,
'f1': 0.5596221959858323,
'p': 0.4197166469893743,
'r': 0.8394332939787486}],
'www.cnn.com;2000': [{'a': 0.8413491567770144,
'f1': 0.7635009310986964,
'p': 0.7649253731343284,
'r': 0.7620817843866171},
{'a': 0.8187513985231596,
'f1': 0.8204787234042553,
'p': 0.9292168674698795,
'r': 0.7345238095238096},
{'a': 0.8379696013765414,
'f1': 0.8961206104063246,
'p': 0.983454398708636,
'r': 0.8230327592029719},
{'a': 0.7773207990599295,
'f1': 0.6366251198465964,
'p': 0.5561139028475712,
'r': 0.7443946188340808},
{'a': 0.8442677269715043,
'f1': 0.7006369426751592,
'p': 0.6909547738693468,
'r': 0.710594315245478},
{'a': 0.8588469184890656,
'f1': 0.8734402852049911,
'p': 0.9386973180076629,
'r': 0.8166666666666667},
{'a': 0.8427947598253275,
'f1': 0.8239130434782608,
'p': 0.9088729016786571,
'r': 0.7534791252485089},
{'a': 0.830293417922284,
'f1': 0.812280701754386,
'p': 0.904296875,
'r': 0.7372611464968153},
{'a': 0.8313310908217203,
'f1': 0.7936507936507936,
'p': 0.8766233766233766,
'r': 0.7250268528464017},
{'a': 0.8626865671641791,
'f1': 0.8286140089418778,
'p': 0.8286140089418778,
'r': 0.8286140089418778},
{'a': 0.8010358221838585,
'f1': 0.7705326032852164,
'p': 0.7640671273445212,
'r': 0.7771084337349398},
{'a': 0.8652482269503546,
'f1': 0.8425748884639899,
'p': 0.8731836195508587,
'r': 0.8140394088669951},
{'a': 0.7909690512430239,
'f1': 0.7408805031446541,
'p': 0.666289592760181,
'r': 0.8342776203966006},
{'a': 0.8563162970106075,
'f1': 0.8228299643281807,
'p': 0.9389416553595658,
'r': 0.7322751322751323},
{'a': 0.856437523737182,
'f1': 0.8700137551581842,
'p': 0.9322033898305084,
'r': 0.8156028368794326},
{'a': 0.8412787436904094,
'f1': 0.7342723004694836,
'p': 0.7057761732851986,
'r': 0.7651663405088063},
{'a': 0.8636363636363636,
'f1': 0.8282685512367491,
'p': 0.8798798798798799,
'r': 0.7823765020026703},
{'a': 0.7565043362241495,
'f1': 0.48951048951048953,
'p': 0.35714285714285715,
'r': 0.7777777777777778},
{'a': 0.8438485804416404,
'f1': 0.8075178224238496,
'p': 0.8154450261780105,
'r': 0.7997432605905006},
{'a': 0.8397755610972568,
'f1': 0.7905460472697636,
'p': 0.739329268292683,
'r': 0.8493870402802102},
{'a': 0.8527443105756358,
'f1': 0.7349397590361446,
'p': 0.664488017429194,
'r': 0.8221024258760108},
{'a': 0.8501106194690266,
'f1': 0.864567716141929,
'p': 0.8871794871794871,
'r': 0.8430799220272904},
{'a': 0.8918629550321199,
'f1': 0.9126297577854673,
'p': 0.9173913043478261,
'r': 0.9079173838209983},
{'a': 0.8393980233602875,
'f1': 0.8514440058175774,
'p': 0.9499304589707928,
'r': 0.771460843373494},
{'a': 0.7984886649874056,
'f1': 0.7307692307692308,
'p': 0.892018779342723,
'r': 0.6188925081433225}],
'www.cnn.com;2005': [{'a': 0.7973986993496749,
'f1': 0.6505608283002589,
'p': 0.5568685376661743,
'r': 0.7821576763485477},
{'a': 0.7673627974745022,
'f1': 0.6335118592195869,
'p': 0.5130111524163569,
'r': 0.828},
{'a': 0.8120333772507685,
'f1': 0.7855711422845693,
'p': 0.7101449275362319,
'r': 0.8789237668161435},
{'a': 0.7939339875111507,
'f1': 0.7072243346007605,
'p': 0.6421173762945915,
'r': 0.7870239774330042},
{'a': 0.7925133689839572,
'f1': 0.6040816326530613,
'p': 0.4860426929392447,
'r': 0.7978436657681941},
{'a': 0.7999199679871949,
'f1': 0.7494989979959921,
'p': 0.6900369003690037,
'r': 0.8201754385964912},
{'a': 0.7803790412486065,
'f1': 0.6317757009345795,
'p': 0.5113464447806354,
'r': 0.8264058679706602},
{'a': 0.7876530069185738,
'f1': 0.6084396467124632,
'p': 0.496,
'r': 0.7868020304568528},
{'a': 0.8035569927243331,
'f1': 0.7892454466608847,
'p': 0.7350565428109854,
'r': 0.8520599250936329},
{'a': 0.8097345132743363,
'f1': 0.5338753387533874,
'p': 0.4227467811158798,
'r': 0.7242647058823529},
{'a': 0.8237082066869301,
'f1': 0.7810140237324704,
'p': 0.7105004906771345,
'r': 0.8670658682634731},
{'a': 0.7990867579908676,
'f1': 0.6360294117647058,
'p': 0.5397815912636506,
'r': 0.7740492170022372},
{'a': 0.7872141857209519,
'f1': 0.6632200886262923,
'p': 0.5931307793923382,
'r': 0.7520938023450586},
{'a': 0.8526021655606008,
'f1': 0.8223905723905724,
'p': 0.803453947368421,
'r': 0.8422413793103448},
{'a': 0.8202293202293203,
'f1': 0.7898516036381045,
'p': 0.7313829787234043,
'r': 0.858480749219563},
{'a': 0.7980817768803634,
'f1': 0.6563573883161512,
'p': 0.5568513119533528,
'r': 0.799163179916318},
{'a': 0.8609066253390159,
'f1': 0.8,
'p': 0.7597883597883598,
'r': 0.8447058823529412},
{'a': 0.7880165289256198,
'f1': 0.7170435741864314,
'p': 0.639763779527559,
'r': 0.8155583437892095},
{'a': 0.7800707547169812,
'f1': 0.6166495375128469,
'p': 0.4942339373970346,
'r': 0.819672131147541},
{'a': 0.7733631785114717,
'f1': 0.5263157894736842,
'p': 0.39893617021276595,
'r': 0.7731958762886598},
{'a': 0.7998345740281224,
'f1': 0.7476538060479666,
'p': 0.6867816091954023,
'r': 0.8203661327231121},
{'a': 0.8050885960926851,
'f1': 0.7628524046434495,
'p': 0.6948640483383686,
'r': 0.8455882352941176},
{'a': 0.8122946973251994,
'f1': 0.726027397260274,
'p': 0.6354916067146283,
'r': 0.8466453674121406},
{'a': 0.8142235123367199,
'f1': 0.6531165311653115,
'p': 0.5540229885057472,
'r': 0.7953795379537953},
{'a': 0.819165870741802,
'f1': 0.8326458456098998,
'p': 0.78369384359401,
'r': 0.8881206788183532}],
'www.cnn.com;2010': [{'a': 0.710801393728223,
'f1': 0.7120782087669505,
'p': 0.6169398907103825,
'r': 0.8419090231170768},
{'a': 0.5952914798206278,
'f1': 0.4946336910872608,
'p': 0.36856745479833103,
'r': 0.75177304964539},
{'a': 0.7109103544127866,
'f1': 0.6938925680647535,
'p': 0.609961190168176,
'r': 0.8046075085324232},
{'a': 0.6587466587466587,
'f1': 0.6996078431372549,
'p': 0.5735105015002143,
'r': 0.8967828418230563},
{'a': 0.5922480620155038,
'f1': 0.2563619227144204,
'p': 0.15832363213038417,
'r': 0.6732673267326733},
{'a': 0.614507299270073,
'f1': 0.4332662642521798,
'p': 0.3052930056710775,
'r': 0.745958429561201},
{'a': 0.47268408551068886,
'f1': 0.389630793401414,
'p': 0.2607781282860147,
'r': 0.7701863354037267},
{'a': 0.6675623800383877,
'f1': 0.6077898550724637,
'p': 0.49157509157509155,
'r': 0.7959667852906287},
{'a': 0.6402390438247012,
'f1': 0.6072205306655067,
'p': 0.48913805185704273,
'r': 0.8004587155963303},
{'a': 0.6974099810486418,
'f1': 0.721835075493612,
'p': 0.6306443429731101,
'r': 0.8438560760353021},
{'a': 0.5798245614035088,
'f1': 0.36133333333333334,
'p': 0.237928007023705,
'r': 0.7506925207756233},
{'a': 0.5956759443339961,
'f1': 0.575306708431219,
'p': 0.44561261625556003,
'r': 0.8114874815905744},
{'a': 0.5898835516739447,
'f1': 0.5595935912465807,
'p': 0.4236686390532544,
'r': 0.8239355581127733},
{'a': 0.6405244338498212,
'f1': 0.6818565400843882,
'p': 0.5543739279588337,
'r': 0.8854794520547945},
{'a': 0.7779168753129695,
'f1': 0.2898318654923939,
'p': 0.19535887749595252,
'r': 0.5612403100775194},
{'a': 0.5560729530461778,
'f1': 0.4291417165668662,
'p': 0.2935153583617747,
'r': 0.7977736549165121},
{'a': 0.6612617924528302,
'f1': 0.6969137430757056,
'p': 0.5868502887605509,
'r': 0.8577922077922078},
{'a': 0.5782380013149244,
'f1': 0.5249907441688263,
'p': 0.3908489525909592,
'r': 0.7993235625704622},
{'a': 0.7620603015075377,
'f1': 0.8,
'p': 0.7524831148192292,
'r': 0.8539224526600541},
{'a': 0.5506138055946871,
'f1': 0.42045159615883726,
'p': 0.2919971160778659,
'r': 0.7506950880444856},
{'a': 0.611155034859484,
'f1': 0.137711189034109,
'p': 0.07929515418502203,
'r': 0.5230024213075061},
{'a': 0.6647457627118644,
'f1': 0.6843281200127673,
'p': 0.5651027938850817,
'r': 0.8673139158576052},
{'a': 0.6106776180698152,
'f1': 0.5123456790123456,
'p': 0.37613293051359514,
'r': 0.8032258064516129},
{'a': 0.680296200901481,
'f1': 0.6671136439825679,
'p': 0.558361391694725,
'r': 0.8284762697751873},
{'a': 0.5677879714576962,
'f1': 0.5189107413010589,
'p': 0.3775454045129334,
'r': 0.8295042321644498}],
'www.cnn.com;2015': [{'a': 0.5492653414001729,
'f1': 0.6149870801033592,
'p': 0.47928653624856155,
'r': 0.8578784757981462},
{'a': 0.3722879684418146,
'f1': 0.3455012853470437,
'p': 0.2334954829742877,
'r': 0.6640316205533597},
{'a': 0.41245136186770426,
'f1': 0.34143302180685353,
'p': 0.2202572347266881,
'r': 0.7590027700831025},
{'a': 0.24157303370786518,
'f1': 0.2989036353144836,
'p': 0.18057169416686034,
'r': 0.8671875},
{'a': 0.3888649115235218,
'f1': 0.3667262969588551,
'p': 0.24550898203592814,
'r': 0.7243816254416962},
{'a': 0.4935960591133005,
'f1': 0.5155513666352497,
'p': 0.37465753424657533,
'r': 0.8262839879154078},
{'a': 0.4507429829389103,
'f1': 0.44432071269487744,
'p': 0.3066871637202152,
'r': 0.806060606060606},
{'a': 0.5464098073555166,
'f1': 0.5924468922108576,
'p': 0.45334136062612884,
'r': 0.8547105561861521},
{'a': 0.40460251046025103,
'f1': 0.31717850287907867,
'p': 0.20388648982109808,
'r': 0.7138228941684666},
{'a': 0.10986929342678538,
'f1': 0.07372363493002167,
'p': 0.03895021870443657,
'r': 0.6875},
{'a': 0.33683729433272397,
'f1': 0.342546443135478,
'p': 0.21862348178137653,
'r': 0.7907949790794979},
{'a': 0.2972493345164153,
'f1': 0.21739130434782608,
'p': 0.12746234067207415,
'r': 0.738255033557047},
{'a': 0.5474281897127589,
'f1': 0.5511758860549851,
'p': 0.40924741760944416,
'r': 0.8438133874239351},
{'a': 0.6363953894516242,
'f1': 0.7274155538098979,
'p': 0.6044386422976501,
'r': 0.9132149901380671},
{'a': 0.5831210191082803,
'f1': 0.6378976486860305,
'p': 0.5184352517985612,
'r': 0.8289000718907261},
{'a': 0.5144092219020173,
'f1': 0.5644118914261094,
'p': 0.4166666666666667,
'r': 0.8744993324432577},
{'a': 0.207538496897265,
'f1': 0.22863534675615213,
'p': 0.1328653146125845,
'r': 0.8189102564102564},
{'a': 0.4170161773517076,
'f1': 0.364467668190725,
'p': 0.23684210526315788,
'r': 0.7903682719546742},
{'a': 0.5291946308724832,
'f1': 0.5296681193429433,
'p': 0.39638735574510786,
'r': 0.797979797979798},
{'a': 0.30409055827116027,
'f1': 0.3719526352449501,
'p': 0.23782660332541566,
'r': 0.853035143769968},
{'a': 0.3800991231414411,
'f1': 0.27280858676207514,
'p': 0.1724137931034483,
'r': 0.6531049250535332},
{'a': 0.6251548946716233,
'f1': 0.7015293537247164,
'p': 0.5764085934333198,
'r': 0.8960302457466919},
{'a': 0.4566527584608252,
'f1': 0.3908523908523909,
'p': 0.2651622002820874,
'r': 0.7430830039525692},
{'a': 0.5819335572974991,
'f1': 0.6654719235364397,
'p': 0.5330143540669856,
'r': 0.8855325914149443},
{'a': 0.47456100855470507,
'f1': 0.40731335703402743,
'p': 0.2748457847840987,
'r': 0.7862745098039216}],
'www.esquire.com;2000': [{'a': 0.9099462365591398,
'f1': 0.8632653061224489,
'p': 0.94,
'r': 0.7981132075471699},
{'a': 0.9625829812914907,
'f1': 0.9585006693440428,
'p': 0.9636608344549125,
'r': 0.9533954727030626},
{'a': 0.9491106719367589,
'f1': 0.9352608422375865,
'p': 0.9649805447470817,
'r': 0.9073170731707317},
{'a': 0.9598155467720685,
'f1': 0.9318435754189943,
'p': 0.9391891891891891,
'r': 0.9246119733924612},
{'a': 0.9655007187350264,
'f1': 0.9644268774703557,
'p': 0.9730807577268196,
'r': 0.9559255631733594},
{'a': 0.9618320610687023,
'f1': 0.9403578528827038,
'p': 0.946,
'r': 0.9347826086956522},
{'a': 0.9019836639439907,
'f1': 0.7754010695187165,
'p': 0.9177215189873418,
'r': 0.6712962962962963},
{'a': 0.9437291368621841,
'f1': 0.9423264907135875,
'p': 0.9698189134808853,
'r': 0.9163498098859315},
{'a': 0.8920653442240374,
'f1': 0.7964796479647965,
'p': 0.9305912596401028,
'r': 0.6961538461538461},
{'a': 0.9524975514201763,
'f1': 0.9536992840095465,
'p': 0.9736842105263158,
'r': 0.9345182413470533},
{'a': 0.9400584795321637,
'f1': 0.9076576576576577,
'p': 0.9372093023255814,
'r': 0.8799126637554585},
{'a': 0.9348327566320646,
'f1': 0.9060681629260183,
'p': 0.922165820642978,
'r': 0.8905228758169934},
{'a': 0.949874686716792,
'f1': 0.9445676274944568,
'p': 0.9692832764505119,
'r': 0.9210810810810811},
{'a': 0.91316685584563,
'f1': 0.8555240793201133,
'p': 0.9114688128772636,
'r': 0.806049822064057},
{'a': 0.9504480759093306,
'f1': 0.9304733727810651,
'p': 0.9588414634146342,
'r': 0.9037356321839081},
{'a': 0.958079268292683,
'f1': 0.9306431273644389,
'p': 0.929471032745592,
'r': 0.9318181818181818},
{'a': 0.9499749874937469,
'f1': 0.9528746465598492,
'p': 0.9674641148325359,
'r': 0.9387186629526463},
{'a': 0.950109649122807,
'f1': 0.9334308705193854,
'p': 0.9579579579579579,
'r': 0.9101283880171184},
{'a': 0.9122310305775765,
'f1': 0.8496605237633366,
'p': 0.8358778625954199,
'r': 0.863905325443787},
{'a': 0.8981132075471698,
'f1': 0.6954887218045113,
'p': 0.6928838951310862,
'r': 0.6981132075471698},
{'a': 0.9553314121037464,
'f1': 0.9345991561181434,
'p': 0.9425531914893617,
'r': 0.9267782426778243},
{'a': 0.8962962962962963,
'f1': 0.8827319587628867,
'p': 0.9607293127629734,
'r': 0.8164481525625745},
{'a': 0.9317571198280494,
'f1': 0.9070958302852964,
'p': 0.9064327485380117,
'r': 0.9077598828696926},
{'a': 0.3333333333333333,
'f1': 0.125,
'p': 0.07692307692307693,
'r': 0.3333333333333333},
{'a': 0.9076396807297605,
'f1': 0.8430232558139535,
'p': 0.9081419624217119,
'r': 0.786618444846293}],
'www.esquire.com;2005': [{'a': 0.9530398322851154,
'f1': 0.9464114832535886,
'p': 0.9611273080660836,
'r': 0.9321394910461829},
{'a': 0.9524819686041578,
'f1': 0.9445544554455446,
'p': 0.9578313253012049,
'r': 0.931640625},
{'a': 0.9585714285714285,
'f1': 0.9390329362298528,
'p': 0.938375350140056,
'r': 0.9396914446002805},
{'a': 0.9636363636363636,
'f1': 0.9542857142857142,
'p': 0.9553775743707094,
'r': 0.9531963470319634},
{'a': 0.959656652360515,
'f1': 0.9529058116232465,
'p': 0.9606060606060606,
'r': 0.9453280318091452},
{'a': 0.9305689488910318,
'f1': 0.8959537572254336,
'p': 0.8908045977011494,
'r': 0.9011627906976745},
{'a': 0.9037974683544304,
'f1': 0.915929203539823,
'p': 0.8739445114595898,
'r': 0.9621513944223108},
{'a': 0.9250585480093677,
'f1': 0.9111933395004626,
'p': 0.8946412352406903,
'r': 0.9283694627709708},
{'a': 0.9591222030981067,
'f1': 0.9522373051784816,
'p': 0.958502024291498,
'r': 0.9460539460539461},
{'a': 0.8173076923076923,
'f1': 0.7184191954834157,
'p': 0.9187725631768953,
'r': 0.589803012746234},
{'a': 0.948019801980198,
'f1': 0.9354508196721312,
'p': 0.9520333680917622,
'r': 0.919436052366566},
{'a': 0.9205128205128205,
'f1': 0.8671808054841474,
'p': 0.8419301164725458,
'r': 0.8939929328621908},
{'a': 0.9147245762711864,
'f1': 0.8444444444444443,
'p': 0.7817531305903399,
'r': 0.9180672268907563},
{'a': 0.9523595505617978,
'f1': 0.9440928270042194,
'p': 0.9582441113490364,
'r': 0.9303534303534303},
{'a': 0.9564459930313589,
'f1': 0.9479166666666666,
'p': 0.9479166666666666,
'r': 0.9479166666666666},
{'a': 0.8853608247422681,
'f1': 0.8591691995947315,
'p': 0.8037914691943128,
'r': 0.9227421109902068},
{'a': 0.945006599208095,
'f1': 0.9323226854358421,
'p': 0.9566666666666667,
'r': 0.9091869060190074},
{'a': 0.9640317858636553,
'f1': 0.9595484477892757,
'p': 0.9622641509433962,
'r': 0.9568480300187617},
{'a': 0.9663256606990622,
'f1': 0.9613313754282917,
'p': 0.958984375,
'r': 0.9636898920510304},
{'a': 0.9699303263659699,
'f1': 0.9707142857142858,
'p': 0.9714081486776269,
'r': 0.9700214132762313},
{'a': 0.933728981206726,
'f1': 0.900445765230312,
'p': 0.9366306027820711,
'r': 0.8669527896995708},
{'a': 0.9495934959349593,
'f1': 0.9435850773430392,
'p': 0.9418710263396912,
'r': 0.9453053783044667},
{'a': 0.9541052631578948,
'f1': 0.9472665699080792,
'p': 0.9616895874263262,
'r': 0.9332697807435653},
{'a': 0.9520983467571005,
'f1': 0.9443623830625307,
'p': 0.95995995995996,
'r': 0.9292635658914729},
{'a': 0.9658650116369278,
'f1': 0.9645732689210951,
'p': 0.9684721099434115,
'r': 0.9607056936647955}],
'www.esquire.com;2010': [{'a': 0.64679015478305,
'f1': 0.48558758314855877,
'p': 0.36479733481399224,
'r': 0.7259668508287292},
{'a': 0.7274867569158329,
'f1': 0.7477755583802433,
'p': 0.6728758169934641,
'r': 0.8414384961176952},
{'a': 0.5506059613494924,
'f1': 0.2502732240437158,
'p': 0.15055884286653518,
'r': 0.7411003236245954},
{'a': 0.6553830491219139,
'f1': 0.593881223755249,
'p': 0.4773384763741562,
'r': 0.7857142857142857},
{'a': 0.6777251184834123,
'f1': 0.6300114547537229,
'p': 0.5205868433506863,
'r': 0.7976794778825236},
{'a': 0.6317965023847377,
'f1': 0.4688073394495412,
'p': 0.3429530201342282,
'r': 0.7405797101449275},
{'a': 0.5648336727766463,
'f1': 0.34857723577235766,
'p': 0.22730284956925115,
'r': 0.7472766884531591},
{'a': 0.6060495626822158,
'f1': 0.5745769382133019,
'p': 0.41785918717801945,
'r': 0.9193954659949622},
{'a': 0.6417910447761194,
'f1': 0.5782983970406906,
'p': 0.45096153846153847,
'r': 0.8058419243986255},
{'a': 0.6861799816345271,
'f1': 0.6510084248149094,
'p': 0.556525534701004,
'r': 0.7841328413284133},
{'a': 0.6369239013933548,
'f1': 0.5042078302231979,
'p': 0.3982658959537572,
'r': 0.6869391824526421},
{'a': 0.6477832512315271,
'f1': 0.545068928950159,
'p': 0.4341216216216216,
'r': 0.7321937321937322},
{'a': 0.6986740804106074,
'f1': 0.682156553124295,
'p': 0.5990491283676703,
'r': 0.7920377160817181},
{'a': 0.5698198198198198,
'f1': 0.3481228668941979,
'p': 0.224105461393597,
'r': 0.7794759825327511},
{'a': 0.6377635631366975,
'f1': 0.5682010731431799,
'p': 0.5040080160320641,
'r': 0.6511326860841424},
{'a': 0.5783648696893966,
'f1': 0.2365869424692954,
'p': 0.14186046511627906,
'r': 0.7120622568093385},
{'a': 0.5617977528089888,
'f1': 0.22413793103448276,
'p': 0.13391442155309033,
'r': 0.6869918699186992},
{'a': 0.6622045001424096,
'f1': 0.5662033650329188,
'p': 0.4363021420518602,
'r': 0.80625},
{'a': 0.6687982946975752,
'f1': 0.6065210509654954,
'p': 0.49229188078108943,
'r': 0.7897774113767518},
{'a': 0.6808140977910151,
'f1': 0.6742654508611956,
'p': 0.5649405772495756,
'r': 0.8360552763819096},
{'a': 0.539688041594454,
'f1': 0.30977130977130973,
'p': 0.19618169848584596,
'r': 0.7358024691358025},
{'a': 0.5266207895689967,
'f1': 0.3021890016017085,
'p': 0.18891855807743657,
'r': 0.7546666666666667},
{'a': 0.7530434782608696,
'f1': 0.7879893828798938,
'p': 0.7028706717963895,
'r': 0.8965647414118535},
{'a': 0.6745463228271251,
'f1': 0.6431002880335166,
'p': 0.5404929577464789,
'r': 0.7937944408532643},
{'a': 0.6468172484599589,
'f1': 0.6351515151515151,
'p': 0.5314401622718052,
'r': 0.7891566265060241}],
'www.esquire.com;2015': [{'a': 0.28155849110591824,
'f1': 0.01374795417348609,
'p': 0.006949500297835727,
'r': 0.6325301204819277},
{'a': 0.2862649243207671,
'f1': 0.03360488798370672,
'p': 0.017213275086392383,
'r': 0.704},
{'a': 0.2782460460270859,
'f1': 0.014355923435075012,
'p': 0.007253479709860812,
'r': 0.6894409937888198},
{'a': 0.4640900791235545,
'f1': 0.0804177545691906,
'p': 0.04265927977839335,
'r': 0.7},
{'a': 0.31307550644567217,
'f1': 0.13063498379853336,
'p': 0.07100043255267874,
'r': 0.8160511363636364},
{'a': 0.3294219904389396,
'f1': 0.18686762225969644,
'p': 0.10501066098081023,
'r': 0.8475143403441683},
{'a': 0.28933986686390534,
'f1': 0.06061717079132294,
'p': 0.03159034456404051,
'r': 0.7469879518072289},
{'a': 0.2862270923861704,
'f1': 0.031778575089697586,
'p': 0.01626122877188381,
'r': 0.6946778711484594},
{'a': 0.41799908634079486,
'f1': 0.18210999358014124,
'p': 0.10316401988119772,
'r': 0.7757520510483136},
{'a': 0.46510435516950177,
'f1': 0.48832982314416784,
'p': 0.3633436373003606,
'r': 0.7443926466707714},
{'a': 0.29096427737567127,
'f1': 0.05076269067266818,
'p': 0.0262936338320057,
'r': 0.7315315315315315},
{'a': 0.42605054583520263,
'f1': 0.22165889271952952,
'p': 0.12886111766092903,
'r': 0.7920289855072464},
{'a': 0.28935412861224297,
'f1': 0.04506262193970671,
'p': 0.023255813953488372,
'r': 0.7232323232323232},
{'a': 0.3115160810140286,
'f1': 0.11646888567293778,
'p': 0.06279650436953808,
'r': 0.801593625498008},
{'a': 0.30227762247376383,
'f1': 0.08454091756358606,
'p': 0.04462075531577277,
'r': 0.8025114155251142},
{'a': 0.3945309412787481,
'f1': 0.10113809691423206,
'p': 0.054377996467322734,
'r': 0.7219430485762144},
{'a': 0.29112559186160986,
'f1': 0.051796576158525114,
'p': 0.02680773724522913,
'r': 0.7634011090573013},
{'a': 0.28649540419514496,
'f1': 0.02411192057249694,
'p': 0.012267121490422462,
'r': 0.700374531835206},
{'a': 0.40090591226954864,
'f1': 0.09766606822262118,
'p': 0.05230098705294193,
'r': 0.7364620938628159},
{'a': 0.3224801439290886,
'f1': 0.16432128166269758,
'p': 0.0910126506385275,
'r': 0.8447412353923205},
{'a': 0.2870174448676353,
'f1': 0.022939622398350408,
'p': 0.01166983544220809,
'r': 0.6691729323308271},
{'a': 0.4520123839009288,
'f1': 0.31479852190744323,
'p': 0.19485894782703408,
'r': 0.8187643020594966},
{'a': 0.33249523148950927,
'f1': 0.17999786984769411,
'p': 0.1014832162373146,
'r': 0.7952941176470588},
{'a': 0.32776109007008836,
'f1': 0.1710328537685205,
'p': 0.09559529524723956,
'r': 0.8110997963340122},
{'a': 0.3882952889752307,
'f1': 0.05855238569826834,
'p': 0.03059099192918511,
'r': 0.6811594202898551},
{'a': 0.28659309697111995,
'f1': 0.034447692894368886,
'p': 0.01765127336676871,
'r': 0.7112860892388452}],
'www.forbes.com;2000': [{'a': 0.7443408788282291,
'f1': 0.7538461538461539,
'p': 0.6689419795221843,
'r': 0.8634361233480177},
{'a': 0.6693083573487032,
'f1': 0.6602516654330126,
'p': 0.5792207792207792,
'r': 0.7676419965576592},
{'a': 0.8668122270742358,
'f1': 0.9148214641930981,
'p': 0.8782075833014171,
'r': 0.9546211490424646},
{'a': 0.7689606741573034,
'f1': 0.7932118164676305,
'p': 0.6806903991370011,
'r': 0.9503012048192772},
{'a': 0.6996557659208261,
'f1': 0.5389696169088507,
'p': 0.3923076923076923,
'r': 0.8607594936708861},
{'a': 0.7552631578947369,
'f1': 0.757496740547588,
'p': 0.6579841449603624,
'r': 0.8924731182795699},
{'a': 0.7659005879208979,
'f1': 0.8153456998313658,
'p': 0.765637371338084,
'r': 0.8719567177637512},
{'a': 0.7423220973782771,
'f1': 0.7059829059829059,
'p': 0.5760111576011158,
'r': 0.9116997792494481},
{'a': 0.7751817237798546,
'f1': 0.8257545271629778,
'p': 0.7691154422788605,
'r': 0.891398783666377},
{'a': 0.8014457831325301,
'f1': 0.8536931818181818,
'p': 0.9677938808373591,
'r': 0.7636594663278272},
{'a': 0.75,
'f1': 0.7416545718432511,
'p': 0.6231707317073171,
'r': 0.9157706093189965},
{'a': 0.8712945590994371,
'f1': 0.9159107624417749,
'p': 0.8745318352059925,
'r': 0.9613998970663922},
{'a': 0.7123287671232876,
'f1': 0.661611374407583,
'p': 0.5394126738794436,
'r': 0.8553921568627451},
{'a': 0.7142857142857143,
'f1': 0.6728187919463088,
'p': 0.5679886685552408,
'r': 0.8251028806584362},
{'a': 0.6585127201565558,
'f1': 0.48296296296296304,
'p': 0.346072186836518,
'r': 0.7990196078431373},
{'a': 0.751685393258427,
'f1': 0.7868852459016393,
'p': 0.7311827956989247,
'r': 0.8517745302713987},
{'a': 0.7472584856396867,
'f1': 0.7945670628183361,
'p': 0.7458167330677291,
'r': 0.8501362397820164},
{'a': 0.7349081364829396,
'f1': 0.7758544163337772,
'p': 0.7444633730834753,
'r': 0.8100092678405931},
{'a': 0.780862374483166,
'f1': 0.8064684402712573,
'p': 0.7104779411764706,
'r': 0.9324487334137516},
{'a': 0.6510926902788244,
'f1': 0.33381294964028774,
'p': 0.2140221402214022,
'r': 0.7581699346405228},
{'a': 0.8969258589511754,
'f1': 0.8155339805825242,
'p': 0.7325581395348837,
'r': 0.9197080291970803},
{'a': 0.7054992764109985,
'f1': 0.6588432523051132,
'p': 0.5662824207492796,
'r': 0.7875751503006012},
{'a': 0.7934306569343066,
'f1': 0.817301484828922,
'p': 0.7412177985948478,
'r': 0.9107913669064748},
{'a': 0.7213793103448276,
'f1': 0.7259158751696067,
'p': 0.6286721504112809,
'r': 0.8587479935794543},
{'a': 0.795525170913611,
'f1': 0.8621123218776194,
'p': 0.8686655405405406,
'r': 0.8556572379367721},
{'a': 0.6799459824442944,
'f1': 0.6901960784313725,
'p': 0.6330935251798561,
'r': 0.7586206896551724},
{'a': 0.733142037302726,
'f1': 0.7138461538461538,
'p': 0.5895806861499364,
'r': 0.9044834307992202},
{'a': 0.8016129032258065,
'f1': 0.8367978770455551,
'p': 0.9574898785425101,
'r': 0.7431264728986646}],
'www.forbes.com;2005': [{'a': 0.9115621602029721,
'f1': 0.5563636363636364,
'p': 0.44868035190615835,
'r': 0.7320574162679426},
{'a': 0.8734510941207487,
'f1': 0.7993311036789298,
'p': 0.802013422818792,
'r': 0.7966666666666666},
{'a': 0.9065387348969438,
'f1': 0.6649681528662421,
'p': 0.5945330296127562,
'r': 0.7543352601156069},
{'a': 0.8789716926632004,
'f1': 0.7067879636109169,
'p': 0.6242274412855378,
'r': 0.8145161290322581},
{'a': 0.9202395964691047,
'f1': 0.8188976377952756,
'p': 0.7636849132176236,
'r': 0.8827160493827161},
{'a': 0.931665062560154,
'f1': 0.841870824053452,
'p': 0.8181818181818182,
'r': 0.8669724770642202},
{'a': 0.902405498281787,
'f1': 0.6253298153034301,
'p': 0.5302013422818792,
'r': 0.7620578778135049},
{'a': 0.8721075672295184,
'f1': 0.6666666666666666,
'p': 0.6586151368760065,
'r': 0.6749174917491749},
{'a': 0.9332394366197183,
'f1': 0.8892005610098177,
'p': 0.879740980573543,
'r': 0.8988657844990549},
{'a': 0.8610260336906586,
'f1': 0.032,
'p': 0.019543973941368076,
'r': 0.08823529411764706},
{'a': 0.8955849178214631,
'f1': 0.3493975903614458,
'p': 0.4027777777777778,
'r': 0.30851063829787234},
{'a': 0.9279538904899135,
'f1': 0.6825396825396826,
'p': 0.617816091954023,
'r': 0.7624113475177305},
{'a': 0.9144486692015209,
'f1': 0.5202558635394456,
'p': 0.4178082191780822,
'r': 0.6892655367231638},
{'a': 0.8951814768460575,
'f1': 0.7823261858349578,
'p': 0.7332521315468941,
'r': 0.8384401114206128},
{'a': 0.9050140143257552,
'f1': 0.7874564459930313,
'p': 0.7624831309041835,
'r': 0.8141210374639769},
{'a': 0.9185655584609638,
'f1': 0.5947955390334572,
'p': 0.5369127516778524,
'r': 0.6666666666666666},
{'a': 0.9252059308072488,
'f1': 0.8319763138415989,
'p': 0.8168604651162791,
'r': 0.8476621417797888},
{'a': 0.9274785801713586,
'f1': 0.8657223796033995,
'p': 0.854586129753915,
'r': 0.8771526980482205},
{'a': 0.8768613974799542,
'f1': 0.7393939393939394,
'p': 0.8026315789473685,
'r': 0.6853932584269663},
{'a': 0.918423464711274,
'f1': 0.8285163776493256,
'p': 0.8311855670103093,
'r': 0.8258642765685019},
{'a': 0.8763347405016141,
'f1': 0.7802294792586054,
'p': 0.7198697068403909,
'r': 0.8516377649325626},
{'a': 0.8730208993033566,
'f1': 0.5844559585492228,
'p': 0.6635294117647059,
'r': 0.5222222222222223},
{'a': 0.918977202711029,
'f1': 0.8101083032490974,
'p': 0.7912552891396333,
'r': 0.8298816568047337},
{'a': 0.8953338489301367,
'f1': 0.8405341712490181,
'p': 0.8946488294314381,
'r': 0.7925925925925926},
{'a': 0.8788853161843515,
'f1': 0.7638453500522466,
'p': 0.8297389330306469,
'r': 0.707647628267183}],
'www.forbes.com;2010': [{'a': 0.8211586901763224,
'f1': 0.5608247422680412,
'p': 0.4594594594594595,
'r': 0.7195767195767195},
{'a': 0.8101694915254237,
'f1': 0.7941176470588235,
'p': 0.816414686825054,
'r': 0.7730061349693251},
{'a': 0.780921052631579,
'f1': 0.6400000000000001,
'p': 0.6420824295010846,
'r': 0.6379310344827587},
{'a': 0.8728476821192053,
'f1': 0.8248175182481752,
'p': 0.7545909849749582,
'r': 0.9094567404426559},
{'a': 0.8429137760158573,
'f1': 0.840943301555444,
'p': 0.8430583501006036,
'r': 0.8388388388388388},
{'a': 0.8130601792573624,
'f1': 0.7321100917431193,
'p': 0.726775956284153,
'r': 0.7375231053604436},
{'a': 0.7957957957957958,
'f1': 0.6755725190839693,
'p': 0.6082474226804123,
'r': 0.759656652360515},
{'a': 0.8321249302844395,
'f1': 0.8143121529919802,
'p': 0.7317073170731707,
'r': 0.9179415855354659},
{'a': 0.8390871854885898,
'f1': 0.8028673835125447,
'p': 0.7598371777476255,
'r': 0.851063829787234},
{'a': 0.8219832735961768,
'f1': 0.734402852049911,
'p': 0.6467817896389325,
'r': 0.8494845360824742},
{'a': 0.8094707520891364,
'f1': 0.7802056555269923,
'p': 0.7683544303797468,
'r': 0.7924281984334204},
{'a': 0.8355367530407192,
'f1': 0.8243929983060417,
'p': 0.7995618838992333,
'r': 0.8508158508158508},
{'a': 0.797940797940798,
'f1': 0.7043314500941619,
'p': 0.6887661141804788,
'r': 0.720616570327553},
{'a': 0.7769452449567723,
'f1': 0.7006960556844547,
'p': 0.7401960784313726,
'r': 0.6651982378854625},
{'a': 0.8239182692307693,
'f1': 0.7650360866078589,
'p': 0.6943231441048034,
'r': 0.8517857142857143},
{'a': 0.8310967144840352,
'f1': 0.8142493638676845,
'p': 0.7797270955165692,
'r': 0.8519701810436635},
{'a': 0.7862295081967213,
'f1': 0.6693711967545639,
'p': 0.6639839034205232,
'r': 0.6748466257668712},
{'a': 0.8256203890006707,
'f1': 0.7560975609756098,
'p': 0.7158081705150977,
'r': 0.8011928429423459},
{'a': 0.8265830005704506,
'f1': 0.7342657342657343,
'p': 0.6552262090483619,
'r': 0.8349900596421471},
{'a': 0.8095238095238095,
'f1': 0.7272727272727272,
'p': 0.6571879936808847,
'r': 0.8140900195694716},
{'a': 0.7735124760076776,
'f1': 0.681081081081081,
'p': 0.7132075471698113,
'r': 0.6517241379310345},
{'a': 0.5447540011855364,
'f1': 0.1812366737739872,
'p': 0.10278113663845223,
'r': 0.7657657657657657},
{'a': 0.6852173913043478,
'f1': 0.40132304299889743,
'p': 0.27002967359050445,
'r': 0.7811158798283262},
{'a': 0.8125,
'f1': 0.7840571088637717,
'p': 0.7412823397075365,
'r': 0.8320707070707071},
{'a': 0.8658227848101265,
'f1': 0.8361669242658423,
'p': 0.7717546362339515,
'r': 0.9123102866779089}],
'www.forbes.com;2015': [{'a': 0.5838621940163191,
'f1': 0.5894454382826476,
'p': 0.43991989319092123,
'r': 0.8929539295392954},
{'a': 0.5193370165745856,
'f1': 0.4320474777448072,
'p': 0.29811629811629814,
'r': 0.7844827586206896},
{'a': 0.5268065268065268,
'f1': 0.4183381088825215,
'p': 0.28627450980392155,
'r': 0.776595744680851},
{'a': 0.5643808584114455,
'f1': 0.483927527761543,
'p': 0.3481917577796468,
'r': 0.7931034482758621},
{'a': 0.5219842164599775,
'f1': 0.4875100725221596,
'p': 0.34630795649685175,
'r': 0.8231292517006803},
{'a': 0.5499576629974597,
'f1': 0.5466950959488273,
'p': 0.39373464373464373,
'r': 0.8940027894002789},
{'a': 0.6127049180327869,
'f1': 0.6391752577319587,
'p': 0.4952662721893491,
'r': 0.9009687836383208},
{'a': 0.5576354679802956,
'f1': 0.5426146010186758,
'p': 0.401305876443998,
'r': 0.8375262054507338},
{'a': 0.45316681534344333,
'f1': 0.40485436893203886,
'p': 0.2630914826498423,
'r': 0.8778947368421053},
{'a': 0.39160437032777456,
'f1': 0.2538787023977433,
'p': 0.15254237288135594,
'r': 0.7563025210084033},
{'a': 0.46898121798520204,
'f1': 0.2969103240391861,
'p': 0.18532455315145813,
'r': 0.7462121212121212},
{'a': 0.6246428571428572,
'f1': 0.6295382446246035,
'p': 0.5129236071223435,
'r': 0.8147810218978102},
{'a': 0.5793151642208246,
'f1': 0.6175349428208385,
'p': 0.4807121661721068,
'r': 0.8632326820603907},
{'a': 0.5854922279792746,
'f1': 0.6030534351145038,
'p': 0.4606413994169096,
'r': 0.8729281767955801},
{'a': 0.5060922541340296,
'f1': 0.4908030506953791,
'p': 0.3372379778051788,
'r': 0.9011532125205931},
{'a': 0.5326244562590623,
'f1': 0.473024523160763,
'p': 0.326561324303988,
'r': 0.857707509881423},
{'a': 0.553169014084507,
'f1': 0.4775627830382874,
'p': 0.3473053892215569,
'r': 0.764163372859025},
{'a': 0.48580786026200873,
'f1': 0.33941093969144465,
'p': 0.2186088527551942,
'r': 0.7586206896551724},
{'a': 0.5030250648228176,
'f1': 0.469067405355494,
'p': 0.3227445997458704,
'r': 0.8581081081081081},
{'a': 0.6147635524798154,
'f1': 0.6152073732718895,
'p': 0.485160508782556,
'r': 0.8405036726128017},
{'a': 0.42690626517727054,
'f1': 0.2503176620076239,
'p': 0.15247678018575853,
'r': 0.6985815602836879},
{'a': 0.5508317929759704,
'f1': 0.5159362549800797,
'p': 0.37590711175616837,
'r': 0.8222222222222222},
{'a': 0.3676056338028169,
'f1': 0.09292929292929293,
'p': 0.050254916241806266,
'r': 0.6160714285714286},
{'a': 0.6184708554125662,
'f1': 0.624441132637854,
'p': 0.48862973760932943,
'r': 0.8648090815273478},
{'a': 0.5408256880733945,
'f1': 0.4767381076842656,
'p': 0.3568075117370892,
'r': 0.7181102362204724}],
'www.foxnews.com;2000': [{'a': 0.9484320557491289,
'f1': 0.9127358490566038,
'p': 0.9280575539568345,
'r': 0.8979118329466357},
{'a': 0.9435261707988981,
'f1': 0.8379446640316206,
'p': 0.8346456692913385,
'r': 0.8412698412698413},
{'a': 0.9193758127438232,
'f1': 0.8739837398373984,
'p': 0.9684684684684685,
'r': 0.7962962962962963},
{'a': 0.8579710144927536,
'f1': 0.8161350844277673,
'p': 0.9119496855345912,
'r': 0.7385398981324278},
{'a': 0.9368770764119602,
'f1': 0.8592592592592593,
'p': 0.8405797101449275,
'r': 0.8787878787878788},
{'a': 0.848780487804878,
'f1': 0.8660117878192535,
'p': 0.9666666666666667,
'r': 0.7843416370106762},
{'a': 0.9016309887869521,
'f1': 0.8800497203231822,
'p': 0.9698630136986301,
'r': 0.8054607508532423},
{'a': 0.9488286066584464,
'f1': 0.928138528138528,
'p': 0.9469964664310954,
'r': 0.9100169779286927},
{'a': 0.8949804200783197,
'f1': 0.9047465288989345,
'p': 0.9831578947368421,
'r': 0.8379186602870813},
{'a': 0.9185867895545314,
'f1': 0.8359133126934984,
'p': 0.9712230215827338,
'r': 0.7336956521739131},
{'a': 0.9461474730737366,
'f1': 0.8577680525164113,
'p': 0.8235294117647058,
'r': 0.8949771689497716},
{'a': 0.8683333333333333,
'f1': 0.801673640167364,
'p': 0.8050420168067227,
'r': 0.7983333333333333},
{'a': 0.944947209653092,
'f1': 0.8931185944363104,
'p': 0.8866279069767442,
'r': 0.8997050147492626},
{'a': 0.9563994374120957,
'f1': 0.9285714285714286,
'p': 0.9221967963386728,
'r': 0.9350348027842227},
{'a': 0.2631578947368421,
'f1': 0.125,
'p': 0.07692307692307693,
'r': 0.3333333333333333},
{'a': 0.8845286423094272,
'f1': 0.8573021181716833,
'p': 0.9808673469387755,
'r': 0.7613861386138614},
{'a': 0.9117736537876483,
'f1': 0.8980309423347398,
'p': 0.9666919000757003,
'r': 0.8384766907419566},
{'a': 0.8986225895316804,
'f1': 0.8729281767955801,
'p': 0.9503759398496241,
'r': 0.8071519795657727},
{'a': 0.9511173184357542,
'f1': 0.9215246636771302,
'p': 0.9174107142857143,
'r': 0.9256756756756757},
{'a': 0.08677800974251913,
'f1': 0.04358282923985132,
'p': 0.9614147909967846,
'r': 0.022296793437733036},
{'a': 0.930672268907563,
'f1': 0.9226260257913247,
'p': 0.9849812265331664,
'r': 0.8676957001102535},
{'a': 0.909468438538206,
'f1': 0.9269926322839919,
'p': 0.9129287598944591,
'r': 0.9414965986394558},
{'a': 0.958092485549133,
'f1': 0.9683060109289617,
'p': 0.9844444444444445,
'r': 0.9526881720430107},
{'a': 0.915542938254081,
'f1': 0.8402684563758389,
'p': 0.8968481375358166,
'r': 0.7904040404040404},
{'a': 0.9453253143794423,
'f1': 0.9264705882352942,
'p': 0.9559939301972686,
'r': 0.8987161198288159}],
'www.foxnews.com;2005': [{'a': 0.5258455647734525,
'f1': 0.5697741748697163,
'p': 0.45703669298653044,
'r': 0.7563412759415834},
{'a': 0.4402370760619032,
'f1': 0.451966473243069,
'p': 0.29842486164325244,
'r': 0.9309428950863213},
{'a': 0.45121951219512196,
'f1': 0.47752307234295926,
'p': 0.3274806043282973,
'r': 0.8813186813186813},
{'a': 0.463758819756254,
'f1': 0.49179331306990887,
'p': 0.34120624209194433,
'r': 0.8803046789989118},
{'a': 0.4124113475177305,
'f1': 0.38515769944341377,
'p': 0.24515824279641002,
'r': 0.8979238754325259},
{'a': 0.35640535372848947,
'f1': 0.24832514515408663,
'p': 0.14623882167280378,
'r': 0.8224852071005917},
{'a': 0.5404807084123973,
'f1': 0.591049817056009,
'p': 0.4396984924623116,
'r': 0.9012875536480687},
{'a': 0.5967452300785634,
'f1': 0.364408278790023,
'p': 0.2362927276898371,
'r': 0.7959814528593508},
{'a': 0.5875885658441241,
'f1': 0.6924758608125341,
'p': 0.6880883417813178,
'r': 0.6969196919691969},
{'a': 0.5226945244956772,
'f1': 0.30152872957301,
'p': 0.1813570069752695,
'r': 0.89375},
{'a': 0.5320322443784472,
'f1': 0.5359697097181321,
'p': 0.3778173190984579,
'r': 0.9218523878437048},
{'a': 0.45454545454545453,
'f1': 0.3455797933409874,
'p': 0.21469329529243938,
'r': 0.8852941176470588},
{'a': 0.46316680779000846,
'f1': 0.40858208955223874,
'p': 0.26674786845310594,
'r': 0.8725099601593626},
{'a': 0.3981643356643357,
'f1': 0.2934838378655721,
'p': 0.17556783302639656,
'r': 0.89375},
{'a': 0.46656,
'f1': 0.4959177502267917,
'p': 0.34309623430962344,
'r': 0.8942202835332607},
{'a': 0.45215157353885677,
'f1': 0.477328431372549,
'p': 0.3224337748344371,
'r': 0.9186320754716981},
{'a': 0.45156200694225307,
'f1': 0.4707673568818514,
'p': 0.3246535069298614,
'r': 0.8560354374307863},
{'a': 0.6322568242983468,
'f1': 0.6809007506255212,
'p': 0.5783508075942193,
'r': 0.8276561232765612},
{'a': 0.3949843260188088,
'f1': 0.32340052585451357,
'p': 0.1956521739130435,
'r': 0.9318181818181818},
{'a': 0.4062968515742129,
'f1': 0.41867293012331186,
'p': 0.2700757575757576,
'r': 0.9308093994778068},
{'a': 0.6565656565656566,
'f1': 0.6792452830188679,
'p': 0.5594949004371054,
'r': 0.8642160540135033},
{'a': 0.4397839211947887,
'f1': 0.4478546821171312,
'p': 0.31046461137646547,
'r': 0.8033707865168539},
{'a': 0.39493518239372927,
'f1': 0.38866890039597923,
'p': 0.2519747235387046,
'r': 0.8495339547270306},
{'a': 0.5080327371930888,
'f1': 0.4910630291627469,
'p': 0.34222027972027974,
'r': 0.8690344062153164},
{'a': 0.4653337633021606,
'f1': 0.49451219512195127,
'p': 0.33512396694214874,
'r': 0.9430232558139535}],
'www.foxnews.com;2010': [{'a': 0.7679533213644524,
'f1': 0.3561643835616438,
'p': 0.23714759535655058,
'r': 0.715},
{'a': 0.7654424040066778,
'f1': 0.6661386138613862,
'p': 0.5914205344585092,
'r': 0.7624660018132366},
{'a': 0.7062084257206208,
'f1': 0.33316557624559634,
'p': 0.2278045423262216,
'r': 0.6198501872659176},
{'a': 0.7676311030741411,
'f1': 0.32189973614775724,
'p': 0.20854700854700856,
'r': 0.7052023121387283},
{'a': 0.7498181818181818,
'f1': 0.5628970775095299,
'p': 0.4334637964774951,
'r': 0.802536231884058},
{'a': 0.7705615428247305,
'f1': 0.6616478460895022,
'p': 0.5854922279792746,
'r': 0.760576923076923},
{'a': 0.7634275069854083,
'f1': 0.5665529010238908,
'p': 0.45814167433302666,
'r': 0.7421758569299552},
{'a': 0.7614650661742074,
'f1': 0.5948771562990067,
'p': 0.5035398230088496,
'r': 0.7266922094508301},
{'a': 0.7199854914762422,
'f1': 0.513853904282116,
'p': 0.37158469945355194,
'r': 0.8326530612244898},
{'a': 0.7697312588401697,
'f1': 0.6454703832752613,
'p': 0.5571428571428572,
'r': 0.7670807453416149},
{'a': 0.7473504273504273,
'f1': 0.41115537848605577,
'p': 0.31197097944377267,
'r': 0.602803738317757},
{'a': 0.7613836921990822,
'f1': 0.424190800681431,
'p': 0.3070283600493218,
'r': 0.6859504132231405},
{'a': 0.751170046801872,
'f1': 0.4796084828711256,
'p': 0.3555018137847642,
'r': 0.7368421052631579},
{'a': 0.8027233477250083,
'f1': 0.7522935779816513,
'p': 0.6424501424501424,
'r': 0.9074446680080482},
{'a': 0.7661708751297129,
'f1': 0.6547497446373851,
'p': 0.5288778877887789,
'r': 0.8592493297587132},
{'a': 0.7612149532710281,
'f1': 0.328515111695138,
'p': 0.21367521367521367,
'r': 0.7102272727272727},
{'a': 0.7581660173808811,
'f1': 0.585089974293059,
'p': 0.4982486865148862,
'r': 0.7085927770859277},
{'a': 0.8140133951571354,
'f1': 0.8139175257731959,
'p': 0.7344186046511628,
'r': 0.9127167630057803},
{'a': 0.7687253613666228,
'f1': 0.38028169014084506,
'p': 0.2583732057416268,
'r': 0.72},
{'a': 0.7768673179955877,
'f1': 0.71007371007371,
'p': 0.598343685300207,
'r': 0.8731117824773413},
{'a': 0.7780074410913601,
'f1': 0.5251989389920424,
'p': 0.3907894736842105,
'r': 0.8005390835579514},
{'a': 0.7514068189341278,
'f1': 0.48099516240497586,
'p': 0.37540453074433655,
'r': 0.6692307692307692},
{'a': 0.7881219903691814,
'f1': 0.7667844522968198,
'p': 0.7011308562197092,
'r': 0.8460038986354775},
{'a': 0.7661401098901099,
'f1': 0.44947453516572355,
'p': 0.33016627078384797,
'r': 0.7037974683544304},
{'a': 0.788235294117647,
'f1': 0.7470133520730851,
'p': 0.6537515375153752,
'r': 0.8713114754098361}],
'www.foxnews.com;2015': [{'a': 0.8527794746487477,
'f1': 0.8832929782082324,
'p': 0.8652751423149905,
'r': 0.9020771513353115},
{'a': 0.8136067101584343,
'f1': 0.7304582210242587,
'p': 0.7188328912466844,
'r': 0.7424657534246575},
{'a': 0.8357933579335793,
'f1': 0.5505050505050505,
'p': 0.615819209039548,
'r': 0.4977168949771689},
{'a': 0.8047337278106509,
'f1': 0.7009063444108762,
'p': 0.6253369272237197,
'r': 0.7972508591065293},
{'a': 0.8609422492401215,
'f1': 0.8042780748663102,
'p': 0.7752577319587629,
'r': 0.8355555555555556},
{'a': 0.8256227758007118,
'f1': 0.6726057906458798,
'p': 0.601593625498008,
'r': 0.7626262626262627},
{'a': 0.8326315789473684,
'f1': 0.7480190174326465,
'p': 0.6920821114369502,
'r': 0.8137931034482758},
{'a': 0.857025472473295,
'f1': 0.7410714285714285,
'p': 0.780564263322884,
'r': 0.7053824362606232},
{'a': 0.879874213836478,
'f1': 0.8848704038577455,
'p': 0.8940316686967114,
'r': 0.8758949880668258},
{'a': 0.825813221406086,
'f1': 0.2169811320754717,
'p': 0.24731182795698925,
'r': 0.19327731092436976},
{'a': 0.85580204778157,
'f1': 0.8094701240135287,
'p': 0.8140589569160998,
'r': 0.804932735426009},
{'a': 0.8389021479713604,
'f1': 0.7684391080617496,
'p': 0.7088607594936709,
'r': 0.8389513108614233},
{'a': 0.8478260869565217,
'f1': 0.8287910552061496,
'p': 0.8079019073569482,
'r': 0.8507890961262554},
{'a': 0.8489326765188834,
'f1': 0.8357142857142857,
'p': 0.8096885813148789,
'r': 0.8634686346863468},
{'a': 0.8705216844751729,
'f1': 0.8946830265848671,
'p': 0.8955987717502559,
'r': 0.8937691521961185},
{'a': 0.8731343283582089,
'f1': 0.896969696969697,
'p': 0.9150865622423743,
'r': 0.8795562599049128},
{'a': 0.7581274382314694,
'f1': 0.6008583690987124,
'p': 0.5072463768115942,
'r': 0.7368421052631579},
{'a': 0.8699234844025897,
'f1': 0.9002257336343116,
'p': 0.8815207780725022,
'r': 0.9197416974169742},
{'a': 0.8249110320284697,
'f1': 0.7976973684210527,
'p': 0.8391003460207612,
'r': 0.7601880877742947},
{'a': 0.8477801268498943,
'f1': 0.8665018541409146,
'p': 0.844578313253012,
'r': 0.8895939086294417},
{'a': 0.8320959451741862,
'f1': 0.8278688524590164,
'p': 0.8600973236009732,
'r': 0.7979683972911964},
{'a': 0.8352842809364549,
'f1': 0.8167441860465117,
'p': 0.8205607476635514,
'r': 0.812962962962963},
{'a': 0.8672086720867209,
'f1': 0.8122605363984674,
'p': 0.8153846153846154,
'r': 0.8091603053435115},
{'a': 0.8805570433851098,
'f1': 0.8855823499230375,
'p': 0.916135881104034,
'r': 0.8570009930486594},
{'a': 0.7781094527363184,
'f1': 0.6836879432624113,
'p': 0.5863746958637469,
'r': 0.8197278911564626},
{'a': 0.8728461081402258,
'f1': 0.8685503685503685,
'p': 0.8653610771113831,
'r': 0.8717632552404438}],
'www.latimes.com;2000': [{'a': 0.875,
'f1': 0.8980952380952381,
'p': 0.8691244239631336,
'r': 0.929064039408867},
{'a': 0.9025906735751296,
'f1': 0.926791277258567,
'p': 0.9001512859304085,
'r': 0.9550561797752809},
{'a': 0.8231009365244537,
'f1': 0.724025974025974,
'p': 0.6229050279329609,
'r': 0.8643410852713178},
{'a': 0.8624229979466119,
'f1': 0.8973337419552558,
'p': 0.8536443148688047,
'r': 0.9457364341085271},
{'a': 0.897427330437688,
'f1': 0.9322146169132259,
'p': 0.906786941580756,
'r': 0.9591094956837801},
{'a': 0.8704301075268818,
'f1': 0.8928412627834593,
'p': 0.8647717484926787,
'r': 0.9227941176470589},
{'a': 0.8592708741669933,
'f1': 0.884305510796004,
'p': 0.8553615960099751,
'r': 0.9152768512341561},
{'a': 0.889520202020202,
'f1': 0.8816768086544963,
'p': 0.8562048588312541,
'r': 0.9087108013937282},
{'a': 0.8969451601030548,
'f1': 0.9045671438309474,
'p': 0.8966216216216216,
'r': 0.9126547455295736},
{'a': 0.8384236453201971,
'f1': 0.7703081232492996,
'p': 0.792507204610951,
'r': 0.7493188010899182},
{'a': 0.9483818107333061,
'f1': 0.9275028768699655,
'p': 0.9664268585131894,
'r': 0.8915929203539823},
{'a': 0.8772741288930003,
'f1': 0.9196933010492332,
'p': 0.8864255153636718,
'r': 0.9555555555555556},
{'a': 0.8324439701173959,
'f1': 0.8448616600790514,
'p': 0.8142857142857143,
'r': 0.87782340862423},
{'a': 0.8774295914319714,
'f1': 0.9125884016973126,
'p': 0.8862637362637362,
'r': 0.9405247813411078},
{'a': 0.874572962420693,
'f1': 0.9050609530845954,
'p': 0.8706467661691543,
'r': 0.9423076923076923},
{'a': 0.8776134625191229,
'f1': 0.906832298136646,
'p': 0.8815094339622641,
'r': 0.9336530775379697},
{'a': 0.862573922838637,
'f1': 0.9110787172011663,
'p': 0.8719916288803627,
'r': 0.9538344143456696},
{'a': 0.8820960698689956,
'f1': 0.9042244437278297,
'p': 0.8795483061480552,
'r': 0.9303251493032515},
{'a': 0.8241134751773049,
'f1': 0.8759379689844923,
'p': 0.865546218487395,
'r': 0.8865822784810127},
{'a': 0.8782712133227597,
'f1': 0.9131050099065949,
'p': 0.8862637362637362,
'r': 0.9416228838295388},
{'a': 0.8637316561844863,
'f1': 0.9010654490106546,
'p': 0.8799048751486326,
'r': 0.9232688708671242},
{'a': 0.8521951219512195,
'f1': 0.885270730783794,
'p': 0.829666430092264,
'r': 0.9488636363636364},
{'a': 0.8582278481012658,
'f1': 0.8309859154929577,
'p': 0.8113948919449901,
'r': 0.8515463917525773},
{'a': 0.8618481244281794,
'f1': 0.8966461327857632,
'p': 0.8704318936877077,
'r': 0.9244883556810163},
{'a': 0.8911947820930922,
'f1': 0.9291368990152539,
'p': 0.9017991004497751,
'r': 0.9581839904420549}],
'www.latimes.com;2005': [{'a': 0.770698766881973,
'f1': 0.7460162601626016,
'p': 0.6584385763490241,
'r': 0.8604651162790697},
{'a': 0.6036253776435045,
'f1': 0.4858934169278998,
'p': 0.34444444444444444,
'r': 0.824468085106383},
{'a': 0.7529616724738676,
'f1': 0.7269926838659991,
'p': 0.6113989637305699,
'r': 0.8964862298195632},
{'a': 0.7162111568700861,
'f1': 0.657942238267148,
'p': 0.5531107738998483,
'r': 0.811804008908686},
{'a': 0.7800687285223368,
'f1': 0.7188872620790628,
'p': 0.6002444987775061,
'r': 0.8959854014598541},
{'a': 0.865979381443299,
'f1': 0.896237172177879,
'p': 0.9225352112676056,
'r': 0.8713968957871396},
{'a': 0.8629402756508423,
'f1': 0.7629139072847683,
'p': 0.7093596059113301,
'r': 0.8252148997134671},
{'a': 0.7015663643858203,
'f1': 0.7145110410094637,
'p': 0.6007957559681698,
'r': 0.8813229571984436},
{'a': 0.774174869716271,
'f1': 0.7986577181208054,
'p': 0.7245901639344262,
'r': 0.8895917193789534},
{'a': 0.7467752885268161,
'f1': 0.7988133764832794,
'p': 0.7116770783277271,
'r': 0.9102642901044868},
{'a': 0.7273056057866184,
'f1': 0.7370990237099024,
'p': 0.6213991769547325,
'r': 0.9057412167952014},
{'a': 0.7049847405900305,
'f1': 0.7615785146615512,
'p': 0.8307922272047833,
'r': 0.7030103718694662},
{'a': 0.7539511494252874,
'f1': 0.7123057538849223,
'p': 0.5868512110726644,
'r': 0.905982905982906},
{'a': 0.7423595136378573,
'f1': 0.7165582067968185,
'p': 0.6232704402515723,
'r': 0.842687074829932},
{'a': 0.7527675276752768,
'f1': 0.735873850197109,
'p': 0.6557377049180327,
'r': 0.8383233532934131},
{'a': 0.7761194029850746,
'f1': 0.725130890052356,
'p': 0.621773288439955,
'r': 0.869701726844584},
{'a': 0.8142354321470473,
'f1': 0.8355832467982001,
'p': 0.7712460063897764,
'r': 0.9116314199395771},
{'a': 0.7665995975855131,
'f1': 0.7251184834123223,
'p': 0.6232179226069247,
'r': 0.8668555240793201},
{'a': 0.690768531150523,
'f1': 0.6966993755575379,
'p': 0.5614665708123652,
'r': 0.917743830787309},
{'a': 0.6255002858776444,
'f1': 0.3767840152235965,
'p': 0.25158831003811943,
'r': 0.75},
{'a': 0.6696554978763568,
'f1': 0.6744186046511629,
'p': 0.5488266464799394,
'r': 0.8745476477683957},
{'a': 0.7975929978118161,
'f1': 0.8484227775501845,
'p': 0.7756554307116105,
'r': 0.9362567811934901},
{'a': 0.8925913943707539,
'f1': 0.9098805646036917,
'p': 0.9259668508287293,
'r': 0.8943436499466382},
{'a': 0.7356948228882834,
'f1': 0.7526411657559199,
'p': 0.6356923076923077,
'r': 0.9223214285714286},
{'a': 0.6185446009389671,
'f1': 0.3538767395626243,
'p': 0.23116883116883116,
'r': 0.7542372881355932}],
'www.latimes.com;2010': [{'a': 0.6836406204272754,
'f1': 0.5538588526619893,
'p': 0.44115713346482577,
'r': 0.7439024390243902},
{'a': 0.658493870402802,
'f1': 0.4964957580228697,
'p': 0.38325740318906604,
'r': 0.7047120418848167},
{'a': 0.6348812477844736,
'f1': 0.3481012658227848,
'p': 0.23768366464995677,
'r': 0.6501182033096927},
{'a': 0.6901241868716735,
'f1': 0.46639511201629336,
'p': 0.3609141055949567,
'r': 0.6589928057553956},
{'a': 0.8424110384894699,
'f1': 0.8498269896193771,
'p': 0.8365122615803815,
'r': 0.8635724331926864},
{'a': 0.8349782866166601,
'f1': 0.8288288288288287,
'p': 0.812199036918138,
'r': 0.8461538461538461},
{'a': 0.6858288770053476,
'f1': 0.5404771216269065,
'p': 0.43241551939924905,
'r': 0.7205422314911366},
{'a': 0.720886246960281,
'f1': 0.48987654320987656,
'p': 0.3609898107714702,
'r': 0.7619047619047619},
{'a': 0.8373468525559781,
'f1': 0.8216767021769338,
'p': 0.7884444444444444,
'r': 0.8578336557059961},
{'a': 0.8170064644455495,
'f1': 0.7503392130257801,
'p': 0.6991150442477876,
'r': 0.8096632503660323},
{'a': 0.6788732394366197,
'f1': 0.1782437745740498,
'p': 0.10373760488176964,
'r': 0.6325581395348837},
{'a': 0.8153846153846154,
'f1': 0.7900874635568512,
'p': 0.7735490009514748,
'r': 0.8073485600794439},
{'a': 0.6942989214175654,
'f1': 0.5713050993949871,
'p': 0.4508867667121419,
'r': 0.7794811320754716},
{'a': 0.7271190533687515,
'f1': 0.6026722925457103,
'p': 0.5228798047589994,
'r': 0.7112033195020747},
{'a': 0.7435096153846154,
'f1': 0.49598488427019366,
'p': 0.3766140602582496,
'r': 0.7261410788381742},
{'a': 0.6622646810901939,
'f1': 0.319365798414496,
'p': 0.20449601160261058,
'r': 0.7286821705426356},
{'a': 0.7489436619718309,
'f1': 0.5467260012714558,
'p': 0.40681173131504256,
'r': 0.8333333333333334},
{'a': 0.6941079295154186,
'f1': 0.5695466873304921,
'p': 0.4738878143133462,
'r': 0.7135922330097088},
{'a': 0.6500125533517449,
'f1': 0.4468253968253968,
'p': 0.3246828143021915,
'r': 0.7162849872773537},
{'a': 0.6840628507295174,
'f1': 0.5216652506372133,
'p': 0.4120805369127517,
'r': 0.7106481481481481},
{'a': 0.8341806804849433,
'f1': 0.8302642113690952,
'p': 0.8133333333333334,
'r': 0.8479149632052331},
{'a': 0.6771653543307087,
'f1': 0.5138339920948617,
'p': 0.3968792401628223,
'r': 0.7285180572851806},
{'a': 0.6850368471643704,
'f1': 0.5193154034229828,
'p': 0.4078341013824885,
'r': 0.7146702557200538},
{'a': 0.8372620126926564,
'f1': 0.8066774367259021,
'p': 0.7573306370070778,
'r': 0.8629032258064516},
{'a': 0.7132199343479558,
'f1': 0.25446082234290146,
'p': 0.15984405458089668,
'r': 0.623574144486692}],
'www.latimes.com;2015': [{'a': 0.6666200531394211,
'f1': 0.16991643454039,
'p': 0.10107705053852527,
'r': 0.5327510917030568},
{'a': 0.6521987435751,
'f1': 0.19069767441860466,
'p': 0.11232876712328767,
'r': 0.6307692307692307},
{'a': 0.6487778232964632,
'f1': 0.20470006184291897,
'p': 0.12291125139249907,
'r': 0.6118299445471349},
{'a': 0.4315866084425036,
'f1': 0.3835832675611681,
'p': 0.2607296137339056,
'r': 0.7253731343283583},
{'a': 0.36031984007996004,
'f1': 0.23030667468430543,
'p': 0.14248511904761904,
'r': 0.6003134796238244},
{'a': 0.6569448455096737,
'f1': 0.1264705882352941,
'p': 0.07208717518860017,
'r': 0.5149700598802395},
{'a': 0.41148215919487646,
'f1': 0.3998133893165384,
'p': 0.2756513348343519,
'r': 0.7275042444821732},
{'a': 0.6690708527789563,
'f1': 0.19254658385093165,
'p': 0.11462612982744454,
'r': 0.6012931034482759},
{'a': 0.6558361774744027,
'f1': 0.21488632824665216,
'p': 0.12931034482758622,
'r': 0.6353591160220995},
{'a': 0.6665799062988027,
'f1': 0.29731212287438286,
'p': 0.19730615216599928,
'r': 0.6028921023359288},
{'a': 0.6433507262727401,
'f1': 0.10730674196964349,
'p': 0.060198019801980196,
'r': 0.4935064935064935},
{'a': 0.659353023909986,
'f1': 0.12626262626262627,
'p': 0.07237386269644334,
'r': 0.4943502824858757},
{'a': 0.3383720930232558,
'f1': 0.18888096935138987,
'p': 0.11214557765552265,
'r': 0.5981941309255079},
{'a': 0.6471083660864683,
'f1': 0.17789404839764555,
'p': 0.10457516339869281,
'r': 0.5951859956236324},
{'a': 0.6449492784683526,
'f1': 0.17304492512479197,
'p': 0.0995405819295559,
'r': 0.6615776081424937},
{'a': 0.6676242459063487,
'f1': 0.15915697674418602,
'p': 0.09182389937106918,
'r': 0.5967302452316077},
{'a': 0.6518781006378455,
'f1': 0.2010409889394925,
'p': 0.11852704257767549,
'r': 0.6616702355460385},
{'a': 0.6617606602475928,
'f1': 0.21362328110009596,
'p': 0.13072407045009785,
'r': 0.583916083916084},
{'a': 0.6651351351351351,
'f1': 0.24266503667481662,
'p': 0.15135341212352268,
'r': 0.6117103235747303},
{'a': 0.2809569996766893,
'f1': 0.1701492537313433,
'p': 0.09917355371900827,
'r': 0.5984251968503937},
{'a': 0.669045619927232,
'f1': 0.24609499521836148,
'p': 0.15048732943469786,
'r': 0.6748251748251748},
{'a': 0.6481167731004822,
'f1': 0.33002481389578164,
'p': 0.21218889597957882,
'r': 0.7421875},
{'a': 0.37010479867622725,
'f1': 0.26464906632324536,
'p': 0.1683046683046683,
'r': 0.6189759036144579},
{'a': 0.3645077720207254,
'f1': 0.18694066953927743,
'p': 0.1125748502994012,
'r': 0.55078125},
{'a': 0.37014596529881577,
'f1': 0.2111072783718524,
'p': 0.1281943862589024,
'r': 0.59765625}],
'www.nymag.com;2000': [{'a': 0.9247666117517848,
'f1': 0.9227298364354202,
'p': 0.9612220916568742,
'r': 0.8872017353579176},
{'a': 0.9481316229782487,
'f1': 0.9468267581475128,
'p': 0.9616724738675958,
'r': 0.9324324324324325},
{'a': 0.8897451096621221,
'f1': 0.8929804372842348,
'p': 0.9592088998763906,
'r': 0.8353067814854682},
{'a': 0.939297124600639,
'f1': 0.9216494845360824,
'p': 0.93125,
'r': 0.9122448979591836},
{'a': 0.9404255319148936,
'f1': 0.851851851851852,
'p': 0.8341968911917098,
'r': 0.8702702702702703},
{'a': 0.936183395291202,
'f1': 0.9394473838918282,
'p': 0.9603365384615384,
'r': 0.9194476409666283},
{'a': 0.9458804523424879,
'f1': 0.9604252805670408,
'p': 0.9748201438848921,
'r': 0.9464493597206054},
{'a': 0.9463532248342374,
'f1': 0.9495750708215298,
'p': 0.9610091743119266,
'r': 0.9384098544232923},
{'a': 0.9253380364491476,
'f1': 0.929639889196676,
'p': 0.9632606199770379,
'r': 0.8982869379014989},
{'a': 0.579172610556348,
'f1': 0.4337811900191939,
'p': 0.31741573033707865,
'r': 0.6848484848484848},
{'a': 0.9338645418326693,
'f1': 0.9129066107030431,
'p': 0.9314775160599572,
'r': 0.8950617283950617},
{'a': 0.6525641025641026,
'f1': 0.5054744525547445,
'p': 0.36209150326797385,
'r': 0.8368580060422961},
{'a': 0.8954723309111235,
'f1': 0.9032591826176928,
'p': 0.9646408839779006,
'r': 0.8492217898832685},
{'a': 0.9334011184544992,
'f1': 0.9453483521068002,
'p': 0.9716981132075472,
'r': 0.9203899268887084},
{'a': 0.9314917127071823,
'f1': 0.9382470119521912,
'p': 0.9661538461538461,
'r': 0.9119070667957405},
{'a': 0.8896408120770432,
'f1': 0.9014869888475836,
'p': 0.967098703888335,
'r': 0.8442123585726719},
{'a': 0.66008316008316,
'f1': 0.5818414322250639,
'p': 0.45363908275174475,
'r': 0.8110516934046346},
{'a': 0.9418604651162791,
'f1': 0.9029754204398449,
'p': 0.9136125654450262,
'r': 0.8925831202046036},
{'a': 0.9521424596549806,
'f1': 0.9515765765765766,
'p': 0.9624145785876993,
'r': 0.9409799554565702},
{'a': 0.9425454545454546,
'f1': 0.9227761485826003,
'p': 0.9309664694280079,
'r': 0.9147286821705426},
{'a': 0.9206989247311828,
'f1': 0.9141193595342069,
'p': 0.9486404833836858,
'r': 0.8820224719101124},
{'a': 0.9317102137767221,
'f1': 0.9247874427730544,
'p': 0.9541160593792173,
'r': 0.8972081218274112},
{'a': 0.9338970023059185,
'f1': 0.9185606060606061,
'p': 0.9381044487427466,
'r': 0.8998144712430427},
{'a': 0.9357366771159875,
'f1': 0.8903743315508023,
'p': 0.9123287671232877,
'r': 0.8694516971279374},
{'a': 0.9437321937321937,
'f1': 0.9401061410159212,
'p': 0.950920245398773,
'r': 0.9295352323838081}],
'www.nymag.com;2005': [{'a': 0.75,
'f1': 0.737049827331031,
'p': 0.6575704225352113,
'r': 0.8383838383838383},
{'a': 0.7653012048192771,
'f1': 0.7627861665854847,
'p': 0.6624365482233503,
'r': 0.8989667049368542},
{'a': 0.7651880424300868,
'f1': 0.7511497189575881,
'p': 0.6487202118270079,
'r': 0.8919902912621359},
{'a': 0.729296066252588,
'f1': 0.7105700055340344,
'p': 0.6233009708737864,
'r': 0.8262548262548263},
{'a': 0.7477899115964639,
'f1': 0.7365562194459532,
'p': 0.6289424860853432,
'r': 0.8885976408912188},
{'a': 0.7035856573705179,
'f1': 0.5571428571428572,
'p': 0.41785714285714287,
'r': 0.8357142857142857},
{'a': 0.7053206002728513,
'f1': 0.6516129032258065,
'p': 0.5153061224489796,
'r': 0.8859649122807017},
{'a': 0.7481662591687042,
'f1': 0.7426286856571714,
'p': 0.6711833785004516,
'r': 0.831096196868009},
{'a': 0.7631578947368421,
'f1': 0.7643979057591623,
'p': 0.6656534954407295,
'r': 0.8975409836065574},
{'a': 0.8038601982263954,
'f1': 0.8260869565217391,
'p': 0.7497900923593619,
'r': 0.9196704428424305},
{'a': 0.7766109785202864,
'f1': 0.7993138936535163,
'p': 0.7275565964090555,
'r': 0.8867745004757374},
{'a': 0.7740501212611156,
'f1': 0.8037908037908038,
'p': 0.783709787816564,
'r': 0.8249279538904899},
{'a': 0.7084548104956269,
'f1': 0.5741056218057922,
'p': 0.5332278481012658,
'r': 0.6217712177121771},
{'a': 0.736415633937083,
'f1': 0.7091004734350342,
'p': 0.6550048590864918,
'r': 0.7729357798165137},
{'a': 0.7607244995233555,
'f1': 0.7604961832061068,
'p': 0.6794543904518329,
'r': 0.8634886240520043},
{'a': 0.7643142476697736,
'f1': 0.7799419809365934,
'p': 0.7172256097560976,
'r': 0.8546775658492279},
{'a': 0.8083109919571045,
'f1': 0.8378071833648394,
'p': 0.7858156028368795,
'r': 0.897165991902834},
{'a': 0.7155172413793104,
'f1': 0.6890052356020943,
'p': 0.6308724832214765,
'r': 0.7589388696655133},
{'a': 0.660916931457104,
'f1': 0.6314750863344846,
'p': 0.6550665301944729,
'r': 0.6095238095238096},
{'a': 0.7599009900990099,
'f1': 0.7485743908761017,
'p': 0.645218945487042,
'r': 0.891358024691358},
{'a': 0.7795376712328768,
'f1': 0.7966837741808132,
'p': 0.7171286425017769,
'r': 0.8960923623445826},
{'a': 0.7631318136769079,
'f1': 0.7679611650485436,
'p': 0.6641477749790092,
'r': 0.9102416570771001},
{'a': 0.7451596023024595,
'f1': 0.7199539965497412,
'p': 0.624750499001996,
'r': 0.8493894165535957},
{'a': 0.6980392156862745,
'f1': 0.5659526493799324,
'p': 0.4239864864864865,
'r': 0.8508474576271187},
{'a': 0.7451997924234561,
'f1': 0.7218130311614731,
'p': 0.6288252714708786,
'r': 0.8470744680851063},
{'a': 0.7640163098878695,
'f1': 0.7772967772967774,
'p': 0.7100175746924429,
'r': 0.8586609989373007}],
'www.nymag.com;2010': [{'a': 0.6625199362041467,
'f1': 0.2802721088435374,
'p': 0.1736930860033727,
'r': 0.7253521126760564},
{'a': 0.6420281001832621,
'f1': 0.42605288932419194,
'p': 0.29733424470266573,
'r': 0.7512953367875648},
{'a': 0.5772020725388601,
'f1': 0.09198813056379822,
'p': 0.050243111831442464,
'r': 0.543859649122807},
{'a': 0.5812586445366529,
'f1': 0.12940330697340044,
'p': 0.07159904534606205,
'r': 0.6716417910447762},
{'a': 0.629375204448806,
'f1': 0.31540785498489426,
'p': 0.20295489891135304,
'r': 0.7073170731707317},
{'a': 0.5844327176781002,
'f1': 0.22126081582200247,
'p': 0.13056163384390956,
'r': 0.7246963562753036},
{'a': 0.6030725259021079,
'f1': 0.1486590038314176,
'p': 0.08326180257510729,
'r': 0.6928571428571428},
{'a': 0.6371932032724984,
'f1': 0.3750677506775068,
'p': 0.25090645395213923,
'r': 0.7424892703862661},
{'a': 0.5865102639296188,
'f1': 0.2113113735239279,
'p': 0.12481644640234948,
'r': 0.6882591093117408},
{'a': 0.5919842312746386,
'f1': 0.20689655172413793,
'p': 0.12062546537602382,
'r': 0.726457399103139},
{'a': 0.7080801260409633,
'f1': 0.653671562082777,
'p': 0.5370776656428258,
'r': 0.834924965893588},
{'a': 0.5949612403100775,
'f1': 0.23815309842041313,
'p': 0.14264919941775836,
'r': 0.7205882352941176},
{'a': 0.5879467414155571,
'f1': 0.1184407796101949,
'p': 0.06577851790174855,
'r': 0.5939849624060151},
{'a': 0.5753188555670459,
'f1': 0.040498442367601244,
'p': 0.021848739495798318,
'r': 0.2765957446808511},
{'a': 0.5907742998352553,
'f1': 0.23616236162361623,
'p': 0.14107274063188832,
'r': 0.7245283018867924},
{'a': 0.6504258943781942,
'f1': 0.25974025974025977,
'p': 0.15971606033717836,
'r': 0.694980694980695},
{'a': 0.6260661890139884,
'f1': 0.2624495289367429,
'p': 0.16062602965403625,
'r': 0.7169117647058824},
{'a': 0.6330698287220027,
'f1': 0.3199023199023199,
'p': 0.20373250388802489,
'r': 0.7443181818181818},
{'a': 0.7475490196078431,
'f1': 0.627390180878553,
'p': 0.531523642732049,
'r': 0.7654476670870114},
{'a': 0.6569390402075227,
'f1': 0.18111455108359134,
'p': 0.10540540540540541,
'r': 0.6428571428571429},
{'a': 0.6629318394024276,
'f1': 0.4391506991196272,
'p': 0.30881281864530225,
'r': 0.7598566308243727},
{'a': 0.5875888817065288,
'f1': 0.234093637454982,
'p': 0.14264813460131676,
'r': 0.6521739130434783},
{'a': 0.6270661157024794,
'f1': 0.253618194348725,
'p': 0.15397489539748954,
'r': 0.71875},
{'a': 0.5771349862258953,
'f1': 0.1327683615819209,
'p': 0.07442596991290577,
'r': 0.6143790849673203},
{'a': 0.6869346733668342,
'f1': 0.6803488968701898,
'p': 0.5676369863013698,
'r': 0.8489116517285531}],
'www.nymag.com;2015': [{'a': 0.15928099137364804,
'f1': 0.0019234268188079956,
'p': 0.0009650243864270624,
'r': 0.2803030303030303},
{'a': 0.16219166120934295,
'f1': 0.007499353504008275,
'p': 0.0037727994171675382,
'r': 0.6118143459915611},
{'a': 0.1668035266660904,
'f1': 0.024540187719786475,
'p': 0.012497423211708925,
'r': 0.674547983310153},
{'a': 0.21537045559134838,
'f1': 0.05987480133631734,
'p': 0.031244710741004027,
'r': 0.7155038759689922},
{'a': 0.2806415516598284,
'f1': 0.23921104536489152,
'p': 0.14197415246300804,
'r': 0.7591387080620932},
{'a': 0.22193911404835842,
'f1': 0.0825587581760711,
'p': 0.043829334751113176,
'r': 0.7095212479827865},
{'a': 0.15916909525165984,
'f1': 0.002028555824295857,
'p': 0.0010181704260651629,
'r': 0.2653061224489796},
{'a': 0.1614406410591412,
'f1': 0.003261207164302723,
'p': 0.001642721180673255,
'r': 0.22105263157894736},
{'a': 0.15997025436888956,
'f1': 0.00631290264158754,
'p': 0.0031759254438485968,
'r': 0.5147679324894515},
{'a': 0.16043681883835953,
'f1': 0.006165643377114583,
'p': 0.0030975870057526615,
'r': 0.6467391304347826},
{'a': 0.16455971154263868,
'f1': 0.018125191463290106,
'p': 0.009184043048584882,
'r': 0.6853281853281853},
{'a': 0.19813669809473183,
'f1': 0.008715802580154256,
'p': 0.004387950548493819,
'r': 0.6363636363636364},
{'a': 0.16205895830151215,
'f1': 0.005438723712835388,
'p': 0.0027373689973408414,
'r': 0.41338582677165353},
{'a': 0.18011344990858374,
'f1': 0.01901503253309401,
'p': 0.009674933645366592,
'r': 0.5494327390599676},
{'a': 0.2778094261516843,
'f1': 0.26039438372680956,
'p': 0.16656714932577002,
'r': 0.5962771642322765},
{'a': 0.16092582069327271,
'f1': 0.008880857105976506,
'p': 0.00447159755621994,
'r': 0.6370370370370371},
{'a': 0.15998775045935779,
'f1': 0.006005073251540094,
'p': 0.0030172189564584093,
'r': 0.6170212765957447},
{'a': 0.6367570029606012,
'f1': 0.7452483628813289,
'p': 0.6105731483904737,
'r': 0.9561475409836065},
{'a': 0.1612199008885105,
'f1': 0.00877147721995769,
'p': 0.004419832046382238,
'r': 0.568561872909699},
{'a': 0.16245613270266146,
'f1': 0.012084126086285804,
'p': 0.006097560975609756,
'r': 0.6638418079096046},
{'a': 0.2004397317079959,
'f1': 0.013935129569246611,
'p': 0.0070525291828793775,
'r': 0.5783475783475783},
{'a': 0.2062381175433279,
'f1': 0.031142799488800704,
'p': 0.015934746696035243,
'r': 0.6828908554572272},
{'a': 0.16165848336061103,
'f1': 0.00605417712348969,
'p': 0.003041884408392481,
'r': 0.6223404255319149},
{'a': 0.15965723553003572,
'f1': 0.0008338544923910778,
'p': 0.0004178636719770175,
'r': 0.18604651162790697},
{'a': 0.1593253750958274,
'f1': 0.004977574987685687,
'p': 0.0025000651058621317,
'r': 0.5517241379310345},
{'a': 0.20176191927306977,
'f1': 0.019598502892140185,
'p': 0.009972299168975069,
'r': 0.5647058823529412}]},
{'entertainment.msn.com;2000': [{'a': 0.812223206377325,
'f1': 0.009345794392523364,
'p': 0.03571428571428571,
'r': 0.005376344086021506},
{'a': 0.7823577906018137,
'f1': 0.050359712230215826,
'p': 0.21212121212121213,
'r': 0.02857142857142857},
{'a': 0.6307588075880759,
'f1': 0.02154398563734291,
'p': 0.18181818181818182,
'r': 0.011450381679389313},
{'a': 0.7637729549248747,
'f1': 0.1602373887240356,
'p': 0.2755102040816326,
'r': 0.11297071129707113},
{'a': 0.8342728297632469,
'f1': 0.5689149560117301,
'p': 0.776,
'r': 0.44907407407407407},
{'a': 0.7524271844660194,
'f1': 0.006493506493506493,
'p': 0.03571428571428571,
'r': 0.0035714285714285713},
{'a': 0.6017316017316018,
'f1': 0.028169014084507043,
'p': 0.24242424242424243,
'r': 0.014953271028037384},
{'a': 0.7774891774891775,
'f1': 0.14617940199335547,
'p': 0.23157894736842105,
'r': 0.10679611650485436},
{'a': 0.7298161470823341,
'f1': 0.27467811158798283,
'p': 0.46715328467153283,
'r': 0.1945288753799392},
{'a': 0.8880208333333334,
'f1': 0.689156626506024,
'p': 0.6299559471365639,
'r': 0.7606382978723404},
{'a': 0.6185714285714285,
'f1': 0.21238938053097345,
'p': 0.5142857142857142,
'r': 0.13382899628252787},
{'a': 0.8450704225352113,
'f1': 0.6348547717842324,
'p': 0.8406593406593407,
'r': 0.51},
{'a': 0.5781414994720169,
'f1': 0.32687447346251053,
'p': 0.8778280542986425,
'r': 0.20082815734989648},
{'a': 0.9233870967741935,
'f1': 0.05,
'p': 0.07142857142857142,
'r': 0.038461538461538464},
{'a': 0.7061252580867171,
'f1': 0.004662004662004663,
'p': 0.03571428571428571,
'r': 0.0024937655860349127},
{'a': 0.7023227383863081,
'f1': 0.24729520865533228,
'p': 0.7476635514018691,
'r': 0.14814814814814814},
{'a': 0.7808320950965825,
'f1': 0.2716049382716049,
'p': 0.6707317073170732,
'r': 0.17027863777089783},
{'a': 0.7888198757763976,
'f1': 0.3388888888888889,
'p': 0.4420289855072464,
'r': 0.2747747747747748},
{'a': 0.8339552238805971,
'f1': 0.38620689655172413,
'p': 0.448,
'r': 0.3393939393939394},
{'a': 0.7388932190179267,
'f1': 0.20047732696897372,
'p': 0.3783783783783784,
'r': 0.13636363636363635},
{'a': 0.6506309148264984,
'f1': 0.030634573304157548,
'p': 0.21212121212121213,
'r': 0.01650943396226415},
{'a': 0.7018813314037626,
'f1': 0.41310541310541316,
'p': 0.8430232558139535,
'r': 0.27358490566037735},
{'a': 0.7246932515337423,
'f1': 0.14319809069212408,
'p': 0.5263157894736842,
'r': 0.08287292817679558},
{'a': 0.652820830750155,
'f1': 0.08794788273615635,
'p': 0.5,
'r': 0.048214285714285716},
{'a': 0.36980859516070785,
'f1': 0.0022870211549456828,
'p': 0.05714285714285714,
'r': 0.0011668611435239206}],
'entertainment.msn.com;2005': [{'a': 0.37592867756315007,
'f1': 0.1006423982869379,
'p': 0.10904872389791183,
'r': 0.09343936381709742},
{'a': 0.5468451242829828,
'f1': 0.21000000000000002,
'p': 0.12401574803149606,
'r': 0.6847826086956522},
{'a': 0.49657064471879286,
'f1': 0.2338204592901879,
'p': 0.14213197969543148,
'r': 0.6588235294117647},
{'a': 0.4470827679782904,
'f1': 0.03550295857988166,
'p': 0.030927835051546393,
'r': 0.041666666666666664},
{'a': 0.38088341781317886,
'f1': 0.11764705882352941,
'p': 0.13013698630136986,
'r': 0.10734463276836158},
{'a': 0.44715447154471544,
'f1': 0.08724832214765102,
'p': 0.08041237113402062,
'r': 0.09535452322738386},
{'a': 0.38478915662650603,
'f1': 0.028537455410225922,
'p': 0.026143790849673203,
'r': 0.031413612565445025},
{'a': 0.35704419889502764,
'f1': 0.0881488736532811,
'p': 0.10638297872340426,
'r': 0.07525083612040134},
{'a': 0.5362622036262203,
'f1': 0.26681367144432194,
'p': 0.2335907335907336,
'r': 0.3110539845758355},
{'a': 0.2504059759662228,
'f1': 0.10611928737412858,
'p': 0.23909249563699825,
'r': 0.06819313091090094},
{'a': 0.21505376344086022,
'f1': 0.10870421425758173,
'p': 0.19193324061196107,
'r': 0.07582417582417582},
{'a': 0.4064602960969044,
'f1': 0.10183299389002037,
'p': 0.1016260162601626,
'r': 0.10204081632653061},
{'a': 0.38521677327647474,
'f1': 0.11824668705402651,
'p': 0.11462450592885376,
'r': 0.12210526315789473},
{'a': 0.33697632058287796,
'f1': 0.12183353437876962,
'p': 0.18738404452690166,
'r': 0.0902591599642538},
{'a': 0.5548780487804879,
'f1': 0.41948310139165007,
'p': 0.2776315789473684,
'r': 0.8577235772357723},
{'a': 0.5755258126195029,
'f1': 0.24489795918367346,
'p': 0.1610738255033557,
'r': 0.5106382978723404},
{'a': 0.38183217859892227,
'f1': 0.09470124013528748,
'p': 0.0988235294117647,
'r': 0.09090909090909091},
{'a': 0.5196540880503144,
'f1': 0.2769230769230769,
'p': 0.1685878962536023,
'r': 0.7748344370860927},
{'a': 0.44954128440366975,
'f1': 0.140625,
'p': 0.12471131639722864,
'r': 0.16119402985074627},
{'a': 0.5243619489559165,
'f1': 0.3940886699507389,
'p': 0.3436426116838488,
'r': 0.4618937644341801},
{'a': 0.5030581039755352,
'f1': 0.02694610778443114,
'p': 0.01775147928994083,
'r': 0.055900621118012424},
{'a': 0.44848035581912526,
'f1': 0.12470588235294118,
'p': 0.1062124248496994,
'r': 0.150997150997151},
{'a': 0.45838926174496647,
'f1': 0.024183796856106412,
'p': 0.02100840336134454,
'r': 0.02849002849002849},
{'a': 0.376491646778043,
'f1': 0.03863845446182153,
'p': 0.04794520547945205,
'r': 0.032357473035439135},
{'a': 0.44327573253193087,
'f1': 0.23843782117163412,
'p': 0.20530973451327433,
'r': 0.28431372549019607},
{'a': 0.5472118959107807,
'f1': 0.2564102564102564,
'p': 0.15486725663716813,
'r': 0.7446808510638298},
{'a': 0.3931034482758621,
'f1': 0.06976744186046512,
'p': 0.07333333333333333,
'r': 0.06653225806451613}],
'entertainment.msn.com;2010': [{'a': 0.4112582781456954,
'f1': 0.08066184074457083,
'p': 0.04640095181439619,
'r': 0.308300395256917},
{'a': 0.5111695137976346,
'f1': 0.13084112149532712,
'p': 0.10810810810810811,
'r': 0.16568047337278108},
{'a': 0.49666666666666665,
'f1': 0.17636363636363633,
'p': 0.15050426687354537,
'r': 0.21295279912184412},
{'a': 0.6666666666666666,
'f1': 0.009823182711198428,
'p': 0.03731343283582089,
'r': 0.005656108597285068},
{'a': 0.28802588996763756,
'f1': 0.08587257617728532,
'p': 0.05475097138820205,
'r': 0.19897304236200256},
{'a': 0.6051980198019802,
'f1': 0.030395136778115506,
'p': 0.02112676056338028,
'r': 0.05415162454873646},
{'a': 0.9060057197330791,
'f1': 0.13660245183887915,
'p': 0.14444444444444443,
'r': 0.12956810631229235},
{'a': 0.6293345829428304,
'f1': 0.2386910490856593,
'p': 0.25833333333333336,
'r': 0.22182468694096602},
{'a': 0.728719723183391,
'f1': 0.022443890274314215,
'p': 0.06716417910447761,
'r': 0.01347305389221557},
{'a': 0.7132943754565376,
'f1': 0.012578616352201259,
'p': 0.03759398496240601,
'r': 0.0075528700906344415},
{'a': 0.5807481751824818,
'f1': 0.14986123959296946,
'p': 0.18793503480278423,
'r': 0.12461538461538461},
{'a': 0.38456692913385826,
'f1': 0.054211035818005814,
'p': 0.03333333333333333,
'r': 0.14507772020725387},
{'a': 0.8707089874072796,
'f1': 0.18310626702997276,
'p': 0.27053140096618356,
'r': 0.13838550247116968},
{'a': 0.5450070323488045,
'f1': 0.14643799472295516,
'p': 0.10581506196377502,
'r': 0.23768736616702354},
{'a': 0.5846792801107522,
'f1': 0.030172413793103446,
'p': 0.03723404255319149,
'r': 0.025362318840579712},
{'a': 0.518960244648318,
'f1': 0.09649626651349799,
'p': 0.0830860534124629,
'r': 0.11506849315068493},
{'a': 0.4023054755043228,
'f1': 0.1372712146422629,
'p': 0.275,
'r': 0.09146341463414634},
{'a': 0.9010740118360488,
'f1': 0.15269086357947434,
'p': 0.2057335581787521,
'r': 0.12139303482587065},
{'a': 0.5685660613650595,
'f1': 0.10169491525423728,
'p': 0.07784431137724551,
'r': 0.14661654135338345},
{'a': 0.5740194010965837,
'f1': 0.1455160744500846,
'p': 0.19724770642201836,
'r': 0.11528150134048257},
{'a': 0.39776075427224517,
'f1': 0.11591695501730102,
'p': 0.20743034055727555,
'r': 0.08043217286914765},
{'a': 0.49329173166926676,
'f1': 0.14526315789473684,
'p': 0.12813370473537605,
'r': 0.16767922235722965},
{'a': 0.49485759493670883,
'f1': 0.08589835361488904,
'p': 0.13129102844638948,
'r': 0.06382978723404255},
{'a': 0.7530266343825666,
'f1': 0.2117465224111283,
'p': 0.2234910277324633,
'r': 0.2011747430249633},
{'a': 0.5652899457655403,
'f1': 0.12436974789915967,
'p': 0.15711252653927812,
'r': 0.10292072322670376}],
'entertainment.msn.com;2015': [{'a': 0.5695560658247225,
'f1': 0.054235862938826984,
'p': 0.03478964401294499,
'r': 0.12297426120114395},
{'a': 0.6012193861734009,
'f1': 0.017316017316017316,
'p': 0.009211595773503115,
'r': 0.1440677966101695},
{'a': 0.5828690807799443,
'f1': 0.036756260050539856,
'p': 0.021621621621621623,
'r': 0.1225114854517611},
{'a': 0.6018585441404234,
'f1': 0.020822752666328086,
'p': 0.011084076777507435,
'r': 0.17154811715481172},
{'a': 0.6023811967566458,
'f1': 0.020232675771370764,
'p': 0.010807889759524453,
'r': 0.15810276679841898},
{'a': 0.6039279869067103,
'f1': 0.019746835443037975,
'p': 0.010554803788903925,
'r': 0.15294117647058825},
{'a': 0.6229896732690029,
'f1': 0.03383947939262473,
'p': 0.020005129520389842,
'r': 0.10970464135021098},
{'a': 0.6025256184659973,
'f1': 0.010819165378670788,
'p': 0.0056772100567721,
'r': 0.11475409836065574},
{'a': 0.5996716244929496,
'f1': 0.024016953143395338,
'p': 0.013093709884467265,
'r': 0.14488636363636365},
{'a': 0.5860873113015096,
'f1': 0.015526443474041729,
'p': 0.00819672131147541,
'r': 0.14678899082568808},
{'a': 0.5629003558718861,
'f1': 0.037233000195963165,
'p': 0.020033741037536905,
'r': 0.2631578947368421},
{'a': 0.5972264708881411,
'f1': 0.020823004462072386,
'p': 0.011366711772665765,
'r': 0.12389380530973451},
{'a': 0.5611105621974113,
'f1': 0.034767492394611035,
'p': 0.019536019536019536,
'r': 0.15779092702169625},
{'a': 0.6134388123840219,
'f1': 0.02416173570019724,
'p': 0.013239664955417455,
'r': 0.13802816901408452},
{'a': 0.5971439308530627,
'f1': 0.019661636945587563,
'p': 0.010250297973778308,
'r': 0.24022346368715083},
{'a': 0.6003271537622683,
'f1': 0.04928664072632945,
'p': 0.029223276083055628,
'r': 0.15724137931034482},
{'a': 0.6016654369136714,
'f1': 0.011509285901124773,
'p': 0.005963675792897805,
'r': 0.16417910447761194},
{'a': 0.5162773243268599,
'f1': 0.05429200293470285,
'p': 0.030002027163997566,
'r': 0.28516377649325625},
{'a': 0.6075997708611801,
'f1': 0.02096236303001429,
'p': 0.011296534017971758,
'r': 0.14521452145214522},
{'a': 0.6170417724844557,
'f1': 0.044987988643808687,
'p': 0.02642380708055413,
'r': 0.1512481644640235},
{'a': 0.5557666214382633,
'f1': 0.06296508299942759,
'p': 0.04228600717580728,
'r': 0.12322628827483197},
{'a': 0.5885779772705568,
'f1': 0.033647375504710635,
'p': 0.01927525057825752,
'r': 0.13227513227513227},
{'a': 0.606974358974359,
'f1': 0.010841507485802787,
'p': 0.005668016194331984,
'r': 0.1242603550295858},
{'a': 0.6084449021627188,
'f1': 0.014004149377593362,
'p': 0.007317073170731708,
'r': 0.16265060240963855},
{'a': 0.5833571905716194,
'f1': 0.033643204957945996,
'p': 0.019477191184008202,
'r': 0.12337662337662338}],
'news.bbc.co.uk;2000': [{'a': 0.7297084318360915,
'f1': 0.14035087719298245,
'p': 0.509090909090909,
'r': 0.08139534883720931},
{'a': 0.7287157287157288,
'f1': 0.12962962962962962,
'p': 0.509090909090909,
'r': 0.07427055702917772},
{'a': 0.5329062317996506,
'f1': 0.054245283018867926,
'p': 0.46,
'r': 0.02882205513784461},
{'a': 0.5936420179682101,
'f1': 0.08695652173913042,
'p': 0.509090909090909,
'r': 0.04753820033955857},
{'a': 0.5630885122410546,
'f1': 0.04395604395604395,
'p': 0.37209302325581395,
'r': 0.02335766423357664},
{'a': 0.665596919127086,
'f1': 0.08112874779541446,
'p': 0.46,
'r': 0.04448742746615087},
{'a': 0.7130295763389288,
'f1': 0.12224938875305622,
'p': 0.4807692307692308,
'r': 0.0700280112044818},
{'a': 0.6924664602683178,
'f1': 0.1183431952662722,
'p': 0.425531914893617,
'r': 0.06872852233676977},
{'a': 0.7490664675130695,
'f1': 0.125,
'p': 0.48,
'r': 0.0718562874251497},
{'a': 0.6073260073260073,
'f1': 0.04964539007092199,
'p': 0.358974358974359,
'r': 0.02666666666666667},
{'a': 0.7364457831325302,
'f1': 0.19724770642201836,
'p': 0.6142857142857143,
'r': 0.11748633879781421},
{'a': 0.6092273485269594,
'f1': 0.0737812911725955,
'p': 0.5283018867924528,
'r': 0.039660056657223795},
{'a': 0.7618657937806874,
'f1': 0.13134328358208958,
'p': 0.4489795918367347,
'r': 0.07692307692307693},
{'a': 0.6421568627450981,
'f1': 0.10429447852760738,
'p': 0.5862068965517241,
'r': 0.05723905723905724},
{'a': 0.6870799103808812,
'f1': 0.10660980810234541,
'p': 0.4807692307692308,
'r': 0.05995203836930456},
{'a': 0.6457219251336899,
'f1': 0.05357142857142857,
'p': 0.35714285714285715,
'r': 0.02895752895752896},
{'a': 0.6792323509252913,
'f1': 0.09302325581395349,
'p': 0.47058823529411764,
'r': 0.05161290322580645},
{'a': 0.5922155688622754,
'f1': 0.07598371777476255,
'p': 0.5185185185185185,
'r': 0.040995607613469986},
{'a': 0.5509181969949917,
'f1': 0.06488991888760139,
'p': 0.509090909090909,
'r': 0.034653465346534656},
{'a': 0.6133184107442641,
'f1': 0.06241519674355495,
'p': 0.46,
'r': 0.033478893740902474},
{'a': 0.6303780964797914,
'f1': 0.05970149253731344,
'p': 0.4090909090909091,
'r': 0.03220035778175313},
{'a': 0.6047648136835675,
'f1': 0.06095791001451379,
'p': 0.45652173913043476,
'r': 0.03265940902021773},
{'a': 0.6047745358090185,
'f1': 0.1051051051051051,
'p': 0.5645161290322581,
'r': 0.057947019867549666},
{'a': 0.7057971014492753,
'f1': 0.16115702479338842,
'p': 0.6,
'r': 0.09307875894988067},
{'a': 0.6753333333333333,
'f1': 0.11934900542495479,
'p': 0.559322033898305,
'r': 0.06680161943319839}],
'news.bbc.co.uk;2005': [{'a': 0.7814885496183206,
'f1': 0.254071661237785,
'p': 0.5342465753424658,
'r': 0.16666666666666666},
{'a': 0.7305533279871692,
'f1': 0.09189189189189188,
'p': 0.3333333333333333,
'r': 0.05329153605015674},
{'a': 0.6317321688500728,
'f1': 0.10283687943262412,
'p': 0.4603174603174603,
'r': 0.05788423153692615},
{'a': 0.6617071260767423,
'f1': 0.15625,
'p': 0.5555555555555556,
'r': 0.09090909090909091},
{'a': 0.5203125,
'f1': 0.05149330587023687,
'p': 0.38461538461538464,
'r': 0.02759381898454746},
{'a': 0.5657030223390276,
'f1': 0.029368575624082228,
'p': 0.22727272727272727,
'r': 0.015698587127158554},
{'a': 0.44362383062530775,
'f1': 0.047217537942664416,
'p': 0.5185185185185185,
'r': 0.024734982332155476},
{'a': 0.6749090909090909,
'f1': 0.10420841683366734,
'p': 0.43333333333333335,
'r': 0.05922551252847381},
{'a': 0.7581047381546134,
'f1': 0.18941504178272978,
'p': 0.5074626865671642,
'r': 0.11643835616438356},
{'a': 0.5664174813110984,
'f1': 0.045569620253164564,
'p': 0.36,
'r': 0.024324324324324326},
{'a': 0.6458944281524927,
'f1': 0.1170018281535649,
'p': 0.49230769230769234,
'r': 0.06639004149377593},
{'a': 0.725705329153605,
'f1': 0.17452830188679247,
'p': 0.5211267605633803,
'r': 0.1048158640226629},
{'a': 0.6671511627906976,
'f1': 0.15498154981549814,
'p': 0.56,
'r': 0.08993576017130621},
{'a': 0.7111111111111111,
'f1': 0.18750000000000003,
'p': 0.56,
'r': 0.1126005361930295},
{'a': 0.741726618705036,
'f1': 0.04774535809018568,
'p': 0.2571428571428571,
'r': 0.02631578947368421},
{'a': 0.7377049180327869,
'f1': 0.17098445595854922,
'p': 0.4852941176470588,
'r': 0.10377358490566038},
{'a': 0.7497949138638228,
'f1': 0.1456582633053221,
'p': 0.43333333333333335,
'r': 0.08754208754208755},
{'a': 0.5729103111653447,
'f1': 0.07894736842105261,
'p': 0.4838709677419355,
'r': 0.04297994269340974},
{'a': 0.6473520249221184,
'f1': 0.09872611464968153,
'p': 0.09717868338557993,
'r': 0.10032362459546926},
{'a': 0.5315634218289086,
'f1': 0.04567307692307692,
'p': 0.3584905660377358,
'r': 0.024390243902439025},
{'a': 0.42221362229102166,
'f1': 0.03490627020038785,
'p': 0.54,
'r': 0.018036072144288578},
{'a': 0.7912457912457912,
'f1': 0.0948905109489051,
'p': 0.325,
'r': 0.05555555555555555},
{'a': 0.5580140313005937,
'f1': 0.061855670103092786,
'p': 0.5094339622641509,
'r': 0.032926829268292684},
{'a': 0.565121412803532,
'f1': 0.06635071090047394,
'p': 0.08888888888888889,
'r': 0.052930056710775046},
{'a': 0.6344993968636912,
'f1': 0.10089020771513352,
'p': 0.5666666666666667,
'r': 0.05537459283387622}],
'news.bbc.co.uk;2010': [{'a': 0.5452006980802793,
'f1': 0.03837638376383764,
'p': 0.05726872246696035,
'r': 0.02885682574916759},
{'a': 0.6417272338606242,
'f1': 0.027842227378190254,
'p': 0.026490066225165563,
'r': 0.029339853300733496},
{'a': 0.6973148901545972,
'f1': 0.05583756345177666,
'p': 0.04721030042918455,
'r': 0.06832298136645963},
{'a': 0.656056221579165,
'f1': 0.07142857142857142,
'p': 0.06926406926406926,
'r': 0.07373271889400922},
{'a': 0.7386526516961299,
'f1': 0.09286898839137644,
'p': 0.06073752711496746,
'r': 0.19718309859154928},
{'a': 0.6303692539562924,
'f1': 0.035398230088495575,
'p': 0.040268456375838924,
'r': 0.031578947368421054},
{'a': 0.7012601927353596,
'f1': 0.10643015521064302,
'p': 0.06233766233766234,
'r': 0.36363636363636365},
{'a': 0.6930131004366812,
'f1': 0.04871447902571042,
'p': 0.03991130820399113,
'r': 0.0625},
{'a': 0.7025939177101967,
'f1': 0.0620592383638928,
'p': 0.04835164835164835,
'r': 0.08661417322834646},
{'a': 0.5305662805662805,
'f1': 0.048271363339856495,
'p': 0.08061002178649238,
'r': 0.03445065176908752},
{'a': 0.6972773419473927,
'f1': 0.0708215297450425,
'p': 0.05470459518599562,
'r': 0.10040160642570281},
{'a': 0.6075441412520064,
'f1': 0.043052837573385516,
'p': 0.049107142857142856,
'r': 0.03832752613240418},
{'a': 0.6874451273046532,
'f1': 0.058201058201058205,
'p': 0.04741379310344827,
'r': 0.07534246575342465},
{'a': 0.6854082998661312,
'f1': 0.08083441981747067,
'p': 0.06553911205073996,
'r': 0.1054421768707483},
{'a': 0.6903729401561145,
'f1': 0.07272727272727272,
'p': 0.0462046204620462,
'r': 0.17073170731707318},
{'a': 0.601063829787234,
'f1': 0.06483790523690773,
'p': 0.08533916849015317,
'r': 0.05227882037533512},
{'a': 0.6620473848904783,
'f1': 0.05735660847880299,
'p': 0.05088495575221239,
'r': 0.06571428571428571},
{'a': 0.6549815498154982,
'f1': 0.020942408376963352,
'p': 0.022123893805309734,
'r': 0.019880715705765408},
{'a': 0.639269406392694,
'f1': 0.05389221556886228,
'p': 0.05947136563876652,
'r': 0.04927007299270073},
{'a': 0.5706154393454287,
'f1': 0.12090313182811364,
'p': 0.16435643564356436,
'r': 0.0956221198156682},
{'a': 0.5213270142180095,
'f1': 0.058588548601864174,
'p': 0.09166666666666666,
'r': 0.043052837573385516},
{'a': 0.6842568161829375,
'f1': 0.052770448548812667,
'p': 0.044444444444444446,
'r': 0.06493506493506493},
{'a': 0.572356549981956,
'f1': 0.07638347622759158,
'p': 0.1044776119402985,
'r': 0.060196560196560195},
{'a': 0.6512922465208748,
'f1': 0.03732162458836443,
'p': 0.0367170626349892,
'r': 0.03794642857142857},
{'a': 0.6903001304915181,
'f1': 0.06068601583113456,
'p': 0.050997782705099776,
'r': 0.0749185667752443}],
'news.bbc.co.uk;2015': [{'a': 0.5385607798165137,
'f1': 0.06992198786477896,
'p': 0.04235211760588029,
'r': 0.20033112582781457},
{'a': 0.5438546287022464,
'f1': 0.051190476190476196,
'p': 0.029965156794425088,
'r': 0.17551020408163265},
{'a': 0.524445564516129,
'f1': 0.07771260997067449,
'p': 0.055381400208986416,
'r': 0.13022113022113022},
{'a': 0.5521681219476099,
'f1': 0.06024844720496894,
'p': 0.033692254254949636,
'r': 0.2844574780058651},
{'a': 0.5129887520085699,
'f1': 0.0752606153063819,
'p': 0.051766351871283665,
'r': 0.1378026070763501},
{'a': 0.5508787476000591,
'f1': 0.054709356543363376,
'p': 0.03172314347512617,
'r': 0.1986455981941309},
{'a': 0.548373348487001,
'f1': 0.05527488855869243,
'p': 0.032426778242677826,
'r': 0.18712273641851107},
{'a': 0.5480490831380118,
'f1': 0.07348784624081402,
'p': 0.045359385903698535,
'r': 0.19345238095238096},
{'a': 0.549349076597033,
'f1': 0.052815781100859056,
'p': 0.028859527121001392,
'r': 0.31086142322097376},
{'a': 0.5514515883872929,
'f1': 0.07463154593916588,
'p': 0.0429138117562207,
'r': 0.2860576923076923},
{'a': 0.48181678686512197,
'f1': 0.030989956958393117,
'p': 0.016279770877298764,
'r': 0.32142857142857145},
{'a': 0.5531212484993998,
'f1': 0.04059278350515465,
'p': 0.02171664943123061,
'r': 0.3103448275862069},
{'a': 0.5388310648518815,
'f1': 0.06846361185983828,
'p': 0.04411253907606808,
'r': 0.15282791817087846},
{'a': 0.5498312050491707,
'f1': 0.05368713360074051,
'p': 0.03035589672016748,
'r': 0.232},
{'a': 0.547472433051697,
'f1': 0.05558876270173342,
'p': 0.032449406838799724,
'r': 0.19375},
{'a': 0.5538528896672504,
'f1': 0.050326188257222744,
'p': 0.028351417570878543,
'r': 0.22375690607734808},
{'a': 0.5556650246305419,
'f1': 0.0733783387144115,
'p': 0.045653761869978084,
'r': 0.18684603886397608},
{'a': 0.5337496678182302,
'f1': 0.06997084548104957,
'p': 0.04597701149425287,
'r': 0.14634146341463414},
{'a': 0.5289724796017148,
'f1': 0.0754614549402823,
'p': 0.04848273456574817,
'r': 0.1701346389228886},
{'a': 0.5397741852707211,
'f1': 0.07144706186901371,
'p': 0.0438512869399428,
'r': 0.19273743016759776},
{'a': 0.5394017831463905,
'f1': 0.0702467343976778,
'p': 0.04247104247104247,
'r': 0.20302013422818793},
{'a': 0.5484760722658943,
'f1': 0.04825581395348838,
'p': 0.02896022330774599,
'r': 0.1445993031358885},
{'a': 0.5238552234713463,
'f1': 0.06915036183328867,
'p': 0.045326774420238934,
'r': 0.14576271186440679},
{'a': 0.5412545916925685,
'f1': 0.06775768016078093,
'p': 0.04143258426966292,
'r': 0.1858267716535433},
{'a': 0.5600315955766193,
'f1': 0.05912162162162162,
'p': 0.03379465722561957,
'r': 0.23595505617977527}],
'news.yahoo.com;2000': [{'a': 0.48945147679324896,
'f1': 0.1577726218097448,
'p': 0.723404255319149,
'r': 0.08854166666666667},
{'a': 0.5880971025841817,
'f1': 0.1054421768707483,
'p': 0.5344827586206896,
'r': 0.05849056603773585},
{'a': 0.5663461538461538,
'f1': 0.05052631578947368,
'p': 0.3076923076923077,
'r': 0.027522935779816515},
{'a': 0.6125933831376734,
'f1': 0.02680965147453083,
'p': 0.1,
'r': 0.015479876160990712},
{'a': 0.612027158098933,
'f1': 0.15611814345991562,
'p': 0.578125,
'r': 0.09024390243902439},
{'a': 0.6062683643486778,
'f1': 0.14102564102564102,
'p': 0.55,
'r': 0.08088235294117647},
{'a': 0.3782771535580524,
'f1': 0.047345767575322814,
'p': 0.42857142857142855,
'r': 0.025056947608200455},
{'a': 0.5068807339449541,
'f1': 0.06764960971379011,
'p': 0.47560975609756095,
'r': 0.036414565826330535},
{'a': 0.5685714285714286,
'f1': 0.07814407814407814,
'p': 0.43243243243243246,
'r': 0.042953020134228186},
{'a': 0.6121629374641423,
'f1': 0.08401084010840108,
'p': 0.543859649122807,
'r': 0.04552129221732746},
{'a': 0.6037914691943128,
'f1': 0.009478672985781991,
'p': 0.06451612903225806,
'r': 0.005115089514066497},
{'a': 0.4222365038560411,
'f1': 0.04665959703075291,
'p': 0.4583333333333333,
'r': 0.024581005586592177},
{'a': 0.6623889437314906,
'f1': 0.14925373134328357,
'p': 0.4,
'r': 0.09174311926605505},
{'a': 0.55,
'f1': 0.0425531914893617,
'p': 0.20754716981132076,
'r': 0.023706896551724137},
{'a': 0.36500242365487157,
'f1': 0.07876230661040788,
'p': 0.47058823529411764,
'r': 0.042977743668457406},
{'a': 0.5662154208357858,
'f1': 0.03153745072273324,
'p': 0.3157894736842105,
'r': 0.016597510373443983},
{'a': 0.680379746835443,
'f1': 0.0955223880597015,
'p': 0.37209302325581395,
'r': 0.0547945205479452},
{'a': 0.7199046483909416,
'f1': 0.16961130742049468,
'p': 0.3582089552238806,
'r': 0.1111111111111111},
{'a': 0.6143277723258096,
'f1': 0.13626373626373625,
'p': 0.543859649122807,
'r': 0.07788944723618091},
{'a': 0.6746532156368222,
'f1': 0.16504854368932037,
'p': 0.4473684210526316,
'r': 0.10119047619047619},
{'a': 0.8265947888589398,
'f1': 0.18565400843881857,
'p': 0.3333333333333333,
'r': 0.1286549707602339},
{'a': 0.5793515358361775,
'f1': 0.08872458410351201,
'p': 0.3582089552238806,
'r': 0.05063291139240506},
{'a': 0.6908690869086909,
'f1': 0.17109144542772864,
'p': 0.3918918918918919,
'r': 0.10943396226415095},
{'a': 0.48323170731707316,
'f1': 0.11023622047244094,
'p': 0.49411764705882355,
'r': 0.0620384047267356},
{'a': 0.5053956834532374,
'f1': 0.26861702127659576,
'p': 0.7651515151515151,
'r': 0.1629032258064516}],
'news.yahoo.com;2005': [{'a': 0.5161135161135161,
'f1': 0.0936936936936937,
'p': 0.17391304347826086,
'r': 0.06411837237977805},
{'a': 0.5869448903156769,
'f1': 0.15164835164835166,
'p': 0.22258064516129034,
'r': 0.115},
{'a': 0.46435361216730037,
'f1': 0.06936416184971099,
'p': 0.14893617021276595,
'r': 0.04520990312163617},
{'a': 0.5981830887491265,
'f1': 0.1958041958041958,
'p': 0.22508038585209003,
'r': 0.17326732673267325},
{'a': 0.677547770700637,
'f1': 0.25688073394495414,
'p': 0.22012578616352202,
'r': 0.30837004405286345},
{'a': 0.5543710021321961,
'f1': 0.09784172661870504,
'p': 0.13765182186234817,
'r': 0.07589285714285714},
{'a': 0.6569250317662008,
'f1': 0.10299003322259136,
'p': 0.12601626016260162,
'r': 0.08707865168539326},
{'a': 0.5815691158156912,
'f1': 0.05084745762711864,
'p': 0.07894736842105263,
'r': 0.0375},
{'a': 0.6695601851851852,
'f1': 0.143928035982009,
'p': 0.4067796610169492,
'r': 0.08743169398907104},
{'a': 0.4588893671975893,
'f1': 0.10533807829181495,
'p': 0.23948220064724918,
'r': 0.06751824817518248},
{'a': 0.686013986013986,
'f1': 0.11787819253438114,
'p': 0.12345679012345678,
'r': 0.11278195488721804},
{'a': 0.4600052452137425,
'f1': 0.021852731591448932,
'p': 0.03776683087027915,
'r': 0.01537433155080214},
{'a': 0.524135446685879,
'f1': 0.035062089116143176,
'p': 0.26373626373626374,
'r': 0.018779342723004695},
{'a': 0.3388075687254552,
'f1': 0.05606523955147808,
'p': 0.1757188498402556,
'r': 0.03335354760460885},
{'a': 0.3365617433414044,
'f1': 0.0935727788279773,
'p': 0.28530259365994237,
'r': 0.05596382136800452},
{'a': 0.43752594437525943,
'f1': 0.05706332637439109,
'p': 0.13183279742765272,
'r': 0.03641207815275311},
{'a': 0.729431721798134,
'f1': 0.22384428223844283,
'p': 0.17692307692307693,
'r': 0.304635761589404},
{'a': 0.5936619718309859,
'f1': 0.10819165378670788,
'p': 0.12589928057553956,
'r': 0.0948509485094851},
{'a': 0.461151481274455,
'f1': 0.12839059674502712,
'p': 0.2572463768115942,
'r': 0.0855421686746988},
{'a': 0.42585551330798477,
'f1': 0.12842712842712842,
'p': 0.27300613496932513,
'r': 0.08396226415094339},
{'a': 0.6816171825647505,
'f1': 0.15151515151515152,
'p': 0.15625,
'r': 0.14705882352941177},
{'a': 0.5503685503685504,
'f1': 0.1107871720116618,
'p': 0.1993006993006993,
'r': 0.07671601615074024},
{'a': 0.45257196790939125,
'f1': 0.10631741140215716,
'p': 0.25461254612546125,
'r': 0.06718597857838364},
{'a': 0.705607476635514,
'f1': 0.1908256880733945,
'p': 0.19622641509433963,
'r': 0.18571428571428572},
{'a': 0.4696485623003195,
'f1': 0.14809384164222875,
'p': 0.30149253731343284,
'r': 0.09815354713313897}],
'news.yahoo.com;2010': [{'a': 0.6014760147601476,
'f1': 0.11330049261083744,
'p': 0.18699186991869918,
'r': 0.0812720848056537},
{'a': 0.7451756556160317,
'f1': 0.15156507413509063,
'p': 0.13031161473087818,
'r': 0.18110236220472442},
{'a': 0.6688659793814433,
'f1': 0.13933547695605575,
'p': 0.17333333333333334,
'r': 0.11648745519713262},
{'a': 0.6262395118230358,
'f1': 0.14335664335664336,
'p': 0.2192513368983957,
'r': 0.10649350649350649},
{'a': 0.5593162393162393,
'f1': 0.11286992429456297,
'p': 0.21866666666666668,
'r': 0.07606679035250463},
{'a': 0.714868540344515,
'f1': 0.23572296476306198,
'p': 0.24129353233830847,
'r': 0.23040380047505937},
{'a': 0.6044968669369701,
'f1': 0.13257881972514146,
'p': 0.21635883905013192,
'r': 0.09557109557109557},
{'a': 0.6787878787878788,
'f1': 0.1292442497261774,
'p': 0.1634349030470914,
'r': 0.1068840579710145},
{'a': 0.5697632058287796,
'f1': 0.09640397857689366,
'p': 0.17403314917127072,
'r': 0.06666666666666667},
{'a': 0.7275541795665634,
'f1': 0.14917127071823205,
'p': 0.15,
'r': 0.14835164835164835},
{'a': 0.7713068181818182,
'f1': 0.15706806282722513,
'p': 0.12640449438202248,
'r': 0.2073732718894009},
{'a': 0.7508771929824561,
'f1': 0.15905245346869715,
'p': 0.1298342541436464,
'r': 0.2052401746724891},
{'a': 0.47737955346651,
'f1': 0.06712113266911379,
'p': 0.18658892128279883,
'r': 0.04092071611253197},
{'a': 0.751145038167939,
'f1': 0.09276437847866419,
'p': 0.06702412868632708,
'r': 0.15060240963855423},
{'a': 0.5854922279792746,
'f1': 0.10979228486646883,
'p': 0.2005420054200542,
'r': 0.0755873340143003},
{'a': 0.6485148514851485,
'f1': 0.15976331360946747,
'p': 0.21148825065274152,
'r': 0.12836767036450078},
{'a': 0.6649958228905597,
'f1': 0.15932914046121593,
'p': 0.19895287958115182,
'r': 0.13286713286713286},
{'a': 0.6886993603411514,
'f1': 0.14319248826291078,
'p': 0.1685082872928177,
'r': 0.12448979591836734},
{'a': 0.6681651001225991,
'f1': 0.1524008350730689,
'p': 0.1994535519125683,
'r': 0.12331081081081081},
{'a': 0.6484375,
'f1': 0.13549039433771487,
'p': 0.18157181571815717,
'r': 0.10806451612903226},
{'a': 0.6577026507875529,
'f1': 0.1407907425265188,
'p': 0.19571045576407506,
'r': 0.10993975903614457},
{'a': 0.582679971489665,
'f1': 0.12807148175725988,
'p': 0.22872340425531915,
'r': 0.0889348500517063},
{'a': 0.6080516521078617,
'f1': 0.12095400340715501,
'p': 0.19293478260869565,
'r': 0.0880893300248139},
{'a': 0.5877917414721724,
'f1': 0.09033280507131539,
'p': 0.1601123595505618,
'r': 0.06291390728476821},
{'a': 0.7503404448479346,
'f1': 0.1791044776119403,
'p': 0.16129032258064516,
'r': 0.20134228187919462}],
'news.yahoo.com;2015': [{'a': 0.30976348310009,
'f1': 0.026396798522394953,
'p': 0.013624086431522084,
'r': 0.4224137931034483},
{'a': 0.34896526041389586,
'f1': 0.019784532820697663,
'p': 0.010120876869493957,
'r': 0.4379432624113475},
{'a': 0.2651006180938681,
'f1': 0.013328522613918543,
'p': 0.006774395374425579,
'r': 0.40992167101827676},
{'a': 0.34806191950464394,
'f1': 0.03355852548098105,
'p': 0.0175015318627451,
'r': 0.40658362989323843},
{'a': 0.3922373577159918,
'f1': 0.02650194281159709,
'p': 0.013609618828344845,
'r': 0.502835538752363},
{'a': 0.337158920926394,
'f1': 0.04439486893474623,
'p': 0.023516899078232095,
'r': 0.3956262425447316},
{'a': 0.3539656156193406,
'f1': 0.03669589020826397,
'p': 0.019219591583678846,
'r': 0.40456769983686786},
{'a': 0.35384655315614616,
'f1': 0.03601161665053243,
'p': 0.018890920170627667,
'r': 0.384297520661157},
{'a': 0.27326607247884904,
'f1': 0.024059784920104504,
'p': 0.012396952937493478,
'r': 0.4062927496580027},
{'a': 0.3965652323795794,
'f1': 0.028555572372735986,
'p': 0.014699007946813484,
'r': 0.4982394366197183},
{'a': 0.2938151162482437,
'f1': 0.008412745681953543,
'p': 0.004241423316567825,
'r': 0.509009009009009},
{'a': 0.3456732767684671,
'f1': 0.027121355340652947,
'p': 0.014010968372023138,
'r': 0.42194570135746606},
{'a': 0.36450549727023424,
'f1': 0.031665708261452936,
'p': 0.016507454334705622,
'r': 0.3874296435272045},
{'a': 0.3381643778173813,
'f1': 0.03263742578050182,
'p': 0.01696535244922342,
'r': 0.42814070351758793},
{'a': 0.260559811175951,
'f1': 0.021799513437925026,
'p': 0.011189553794418989,
'r': 0.4208579881656805},
{'a': 0.3380529490616622,
'f1': 0.01651383760009958,
'p': 0.008423993565592855,
'r': 0.41631799163179917},
{'a': 0.3454445909961924,
'f1': 0.024768261030774934,
'p': 0.012762705387848681,
'r': 0.4175},
{'a': 0.2671809594072268,
'f1': 0.011081291663059474,
'p': 0.005609852303107332,
'r': 0.44912280701754387},
{'a': 0.33638386717675595,
'f1': 0.02698342865522804,
'p': 0.01391935849913004,
'r': 0.43914081145584727},
{'a': 0.3538612912178016,
'f1': 0.029481981128500506,
'p': 0.015296291927175494,
'r': 0.40605427974947805},
{'a': 0.3345380105135463,
'f1': 0.015847505139226312,
'p': 0.008063289213448958,
'r': 0.45788336933045354},
{'a': 0.2785684183226894,
'f1': 0.016832339297548043,
'p': 0.008598122404766203,
'r': 0.3977035490605428},
{'a': 0.35007815830609634,
'f1': 0.04873990362949354,
'p': 0.025945746447684076,
'r': 0.4012557077625571},
{'a': 0.3539122516169706,
'f1': 0.04507178354500276,
'p': 0.023857367286544154,
'r': 0.40685358255451715},
{'a': 0.26764944972486243,
'f1': 0.009598105748292847,
'p': 0.004849287560616094,
'r': 0.463265306122449}],
'thenation.com;2000': [{'a': 0.28691392582225334,
'f1': 0.0039100684261974585,
'p': 0.06666666666666667,
'r': 0.002014098690835851},
{'a': 0.39705882352941174,
'f1': 0.1560130010834236,
'p': 0.5294117647058824,
'r': 0.09148665819567979},
{'a': 0.4818258664412511,
'f1': 0.19448094612352168,
'p': 0.5138888888888888,
'r': 0.11993517017828201},
{'a': 0.19546247818499127,
'f1': 0.08592200925313946,
'p': 0.6598984771573604,
'r': 0.045952633439377874},
{'a': 0.4738442822384428,
'f1': 0.17066155321188878,
'p': 0.5668789808917197,
'r': 0.10045146726862303},
{'a': 0.4803098773402195,
'f1': 0.3460601137286759,
'p': 0.7580071174377224,
'r': 0.22421052631578947},
{'a': 0.14657568865301995,
'f1': 0.016312263326536555,
'p': 0.3076923076923077,
'r': 0.008378216636744464},
{'a': 0.17392398691165367,
'f1': 0.07705286839145106,
'p': 0.6748768472906403,
'r': 0.04085893229943334},
{'a': 0.32454856027330403,
'f1': 0.18779342723004697,
'p': 0.6956521739130435,
'r': 0.10854816824966079},
{'a': 0.47415185783521807,
'f1': 0.31545741324921134,
'p': 0.704225352112676,
'r': 0.2032520325203252},
{'a': 0.235950044603033,
'f1': 0.03709949409780776,
'p': 0.32038834951456313,
'r': 0.019689737470167064},
{'a': 0.4800783801436969,
'f1': 0.26296296296296295,
'p': 0.6826923076923077,
'r': 0.1628440366972477},
{'a': 0.8025641025641026,
'f1': 0.536144578313253,
'p': 0.7672413793103449,
'r': 0.41203703703703703},
{'a': 0.7985347985347986,
'f1': 0.5217391304347826,
'p': 0.7692307692307693,
'r': 0.39473684210526316},
{'a': 0.4087403598971722,
'f1': 0.3164933135215453,
'p': 0.7661870503597122,
'r': 0.199438202247191},
{'a': 0.414780292942743,
'f1': 0.17464788732394365,
'p': 0.577639751552795,
'r': 0.10287610619469026},
{'a': 0.35508155583437895,
'f1': 0.20923076923076925,
'p': 0.6868686868686869,
'r': 0.12341197822141561},
{'a': 0.23674752920035938,
'f1': 0.11923276308968378,
'p': 0.8214285714285714,
'r': 0.06428172163219675},
{'a': 0.39259748843357567,
'f1': 0.17430368373764601,
'p': 0.5773809523809523,
'r': 0.10264550264550265},
{'a': 0.4642857142857143,
'f1': 0.2918454935622318,
'p': 0.6834170854271356,
'r': 0.1855388813096862},
{'a': 0.12883435582822086,
'f1': 0.004155844155844157,
'p': 0.25806451612903225,
'r': 0.002094789211835559},
{'a': 0.076158940397351,
'f1': 0.005702066999287241,
'p': 0.24691358024691357,
'r': 0.002884338044418806},
{'a': 0.30004847309743093,
'f1': 0.21521739130434778,
'p': 0.75,
'r': 0.12563451776649745},
{'a': 0.3960273122284296,
'f1': 0.2031122031122031,
'p': 0.6492146596858639,
'r': 0.1203883495145631},
{'a': 0.37623762376237624,
'f1': 0.1936,
'p': 0.8175675675675675,
'r': 0.10980036297640654}],
'thenation.com;2005': [{'a': 0.302190988011575,
'f1': 0.007058823529411765,
'p': 0.09375,
'r': 0.003667481662591687},
{'a': 0.5588499550763701,
'f1': 0.02385685884691849,
'p': 0.125,
'r': 0.013186813186813187},
{'a': 0.28757319453480806,
'f1': 0.0144014401440144,
'p': 0.16666666666666666,
'r': 0.007525870178739417},
{'a': 0.20452446906740535,
'f1': 0.011474469305794606,
'p': 0.15384615384615385,
'r': 0.0059594755661501785},
{'a': 0.5150992234685073,
'f1': 0.020905923344947737,
'p': 0.12,
'r': 0.011450381679389313},
{'a': 0.40779014308426076,
'f1': 0.1162514827995255,
'p': 0.4537037037037037,
'r': 0.06666666666666667},
{'a': 0.36363636363636365,
'f1': 0.013986013986013986,
'p': 0.109375,
'r': 0.007470651013874066},
{'a': 0.45148247978436656,
'f1': 0.187624750499002,
'p': 0.5802469135802469,
'r': 0.11190476190476191},
{'a': 0.3495720868992758,
'f1': 0.021782178217821784,
'p': 0.171875,
'r': 0.011627906976744186},
{'a': 0.4151530877010898,
'f1': 0.01572052401746725,
'p': 0.13043478260869565,
'r': 0.008364312267657992},
{'a': 0.30533683289588803,
'f1': 0.014888337468982632,
'p': 0.125,
'r': 0.0079155672823219},
{'a': 0.3910776361529548,
'f1': 0.04541326067211626,
'p': 0.3125,
'r': 0.024485798237022526},
{'a': 0.4530005310674456,
'f1': 0.23590504451038577,
'p': 0.7098214285714286,
'r': 0.14145907473309607},
{'a': 0.34989043097151207,
'f1': 0.019823788546255505,
'p': 0.17647058823529413,
'r': 0.010501750291715286},
{'a': 0.2568627450980392,
'f1': 0.017286084701815037,
'p': 0.20833333333333334,
'r': 0.009017132551848512},
{'a': 0.1257568700512343,
'f1': 0.0074034902168165,
'p': 0.14583333333333334,
'r': 0.0037981551817688553},
{'a': 0.48153730218538054,
'f1': 0.16301703163017034,
'p': 0.5114503816793893,
'r': 0.09696092619392185},
{'a': 0.4892984542211653,
'f1': 0.3842293906810036,
'p': 0.8072289156626506,
'r': 0.2521166509877705},
{'a': 0.502262443438914,
'f1': 0.2124105011933174,
'p': 0.48633879781420764,
'r': 0.13587786259541984},
{'a': 0.4963308872581721,
'f1': 0.22564102564102564,
'p': 0.625,
'r': 0.1376720901126408},
{'a': 0.4722388059701493,
'f1': 0.17996289424860853,
'p': 0.5987654320987654,
'r': 0.10589519650655022},
{'a': 0.4375,
'f1': 0.1818181818181818,
'p': 0.6140350877192983,
'r': 0.10670731707317073},
{'a': 0.5504469987228607,
'f1': 0.2974051896207585,
'p': 0.6995305164319249,
'r': 0.1888466413181242},
{'a': 0.46401308615049075,
'f1': 0.2001627339300244,
'p': 0.6648648648648648,
'r': 0.11781609195402298},
{'a': 0.32565284178187404,
'f1': 0.05183585313174947,
'p': 0.3287671232876712,
'r': 0.02813599062133646}],
'thenation.com;2010': [{'a': 0.6892906815020863,
'f1': 0.2354551676933607,
'p': 0.4648648648648649,
'r': 0.15765352887259396},
{'a': 0.6797658862876255,
'f1': 0.1545253863134658,
'p': 0.3547297297297297,
'r': 0.09877704609595485},
{'a': 0.6638566912539515,
'f1': 0.06176470588235294,
'p': 0.16279069767441862,
'r': 0.038112522686025406},
{'a': 0.6476302619958787,
'f1': 0.14438884917798428,
'p': 0.24455205811138014,
'r': 0.10243407707910751},
{'a': 0.5886092715231788,
'f1': 0.021424070573408952,
'p': 0.07657657657657657,
'r': 0.012454212454212455},
{'a': 0.7708074534161491,
'f1': 0.016,
'p': 0.012552301255230125,
'r': 0.022058823529411766},
{'a': 0.7546113306982872,
'f1': 0.053367217280813214,
'p': 0.09012875536480687,
'r': 0.03790613718411552},
{'a': 0.7518939393939394,
'f1': 0.08739708676377454,
'p': 0.18110236220472442,
'r': 0.05759599332220367},
{'a': 0.39182982837805175,
'f1': 0.009448818897637795,
'p': 0.056338028169014086,
'r': 0.005156854318865492},
{'a': 0.5875608061153579,
'f1': 0.016570008285004142,
'p': 0.04716981132075472,
'r': 0.010050251256281407},
{'a': 0.6878125,
'f1': 0.14395886889460152,
'p': 0.2978723404255319,
'r': 0.09491525423728814},
{'a': 0.5683652875882946,
'f1': 0.06655755591925805,
'p': 0.1425233644859813,
'r': 0.04341637010676157},
{'a': 0.635335984927779,
'f1': 0.10390946502057614,
'p': 0.3389261744966443,
'r': 0.06136087484811665},
{'a': 0.7242016363156506,
'f1': 0.1619887730553328,
'p': 0.4719626168224299,
'r': 0.09777347531461762},
{'a': 0.6726057906458798,
'f1': 0.030160226201696512,
'p': 0.07582938388625593,
'r': 0.018823529411764704},
{'a': 0.6951011455032903,
'f1': 0.041379310344827586,
'p': 0.07356948228882834,
'r': 0.028784648187633263},
{'a': 0.6735945485519591,
'f1': 0.19630872483221476,
'p': 0.391304347826087,
'r': 0.13101903695408734},
{'a': 0.5642118076688983,
'f1': 0.06649282920469361,
'p': 0.13746630727762804,
'r': 0.043852106620808254},
{'a': 0.7490858249085824,
'f1': 0.06940566414040686,
'p': 0.21914357682619648,
'r': 0.04123222748815166},
{'a': 0.7741935483870968,
'f1': 0.03855421686746988,
'p': 0.07142857142857142,
'r': 0.026402640264026403},
{'a': 0.7309829059829059,
'f1': 0.04981132075471698,
'p': 0.09792284866468842,
'r': 0.03340080971659919},
{'a': 0.5174452986398581,
'f1': 0.022754491017964073,
'p': 0.054441260744985676,
'r': 0.014383043149129448},
{'a': 0.7026963657678781,
'f1': 0.06489675516224189,
'p': 0.1896551724137931,
'r': 0.03914590747330961},
{'a': 0.6183588317107093,
'f1': 0.26394849785407726,
'p': 0.5553047404063205,
'r': 0.17311752287121746},
{'a': 0.6987265415549598,
'f1': 0.12971926427879962,
'p': 0.2537878787878788,
'r': 0.08712613784135241}],
'thenation.com;2015': [{'a': 0.3026148768723026,
'f1': 0.028985507246376815,
'p': 0.04275286757038582,
'r': 0.021925133689839574},
{'a': 0.41273172437915356,
'f1': 0.03892386949055524,
'p': 0.035196687370600416,
'r': 0.04353393085787452},
{'a': 0.36717752234993617,
'f1': 0.03599221789883268,
'p': 0.038302277432712216,
'r': 0.03394495412844037},
{'a': 0.22437275985663083,
'f1': 0.026978417266187053,
'p': 0.06256517205422316,
'r': 0.017196904557179708},
{'a': 0.34198178305271876,
'f1': 0.039484286865431095,
'p': 0.051094890510948905,
'r': 0.03217334208798424},
{'a': 0.36038473098887885,
'f1': 0.0405770964833183,
'p': 0.04707112970711297,
'r': 0.03565768621236133},
{'a': 0.3626139817629179,
'f1': 0.03851444291609354,
'p': 0.043704474505723206,
'r': 0.03442622950819672},
{'a': 0.5534020618556701,
'f1': 0.2627637848876787,
'p': 0.1698943661971831,
'r': 0.5795795795795796},
{'a': 0.40534682080924855,
'f1': 0.023724792408066433,
'p': 0.020942408376963352,
'r': 0.027359781121751026},
{'a': 0.3602894902454374,
'f1': 0.03420427553444181,
'p': 0.037460978147762745,
'r': 0.03146853146853147},
{'a': 0.5136510874595095,
'f1': 0.007554296506137866,
'p': 0.004210526315789474,
'r': 0.03669724770642202},
{'a': 0.4053342113928219,
'f1': 0.05345911949685534,
'p': 0.05209397344228805,
'r': 0.05489773950484392},
{'a': 0.36949255545426923,
'f1': 0.04421925380009213,
'p': 0.05005213764337852,
'r': 0.039603960396039604},
{'a': 0.36,
'f1': 0.029608404966571158,
'p': 0.03225806451612903,
'r': 0.02736098852603707},
{'a': 0.3601761560239069,
'f1': 0.028653295128939826,
'p': 0.031446540880503145,
'r': 0.02631578947368421},
{'a': 0.5260067114093959,
'f1': 0.17758369723435227,
'p': 0.11359404096834265,
'r': 0.4066666666666667},
{'a': 0.2908348457350272,
'f1': 0.08112874779541446,
'p': 0.133076181292189,
'r': 0.05835095137420719},
{'a': 0.20452721143040617,
'f1': 0.028621838855126445,
'p': 0.07572614107883817,
'r': 0.017645636934977036},
{'a': 0.2786144578313253,
'f1': 0.04879432624113475,
'p': 0.08775510204081632,
'r': 0.03379174852652259},
{'a': 0.2536912751677852,
'f1': 0.028457314028956564,
'p': 0.05870236869207003,
'r': 0.018780889621087316},
{'a': 0.38382541720154045,
'f1': 0.05604719764011799,
'p': 0.0584016393442623,
'r': 0.05387523629489603},
{'a': 0.33649008582420836,
'f1': 0.041060735671514116,
'p': 0.04984423676012461,
'r': 0.03490909090909091},
{'a': 0.2452644041041831,
'f1': 0.029434153768079167,
'p': 0.05997931747673216,
'r': 0.019502353732347006},
{'a': 0.5146927871772039,
'f1': 0.09618573797678273,
'p': 0.057539682539682536,
'r': 0.29292929292929293},
{'a': 0.4216312056737589,
'f1': 0.07487237663074305,
'p': 0.06626506024096386,
'r': 0.08604954367666232}],
'www.cnn.com;2000': [{'a': 0.6975016880486158,
'f1': 0.06276150627615062,
'p': 0.2542372881355932,
'r': 0.03579952267303103},
{'a': 0.48680872653475393,
'f1': 0.08337109198006343,
'p': 0.5411764705882353,
'r': 0.045164457535591555},
{'a': 0.1582423894313613,
'f1': 0.043095004897159644,
'p': 0.616822429906542,
'r': 0.022327469553450607},
{'a': 0.7353485502776064,
'f1': 0.10438413361169104,
'p': 0.21929824561403508,
'r': 0.0684931506849315},
{'a': 0.7265787647467037,
'f1': 0.09216589861751151,
'p': 0.17391304347826086,
'r': 0.06269592476489028},
{'a': 0.4201058201058201,
'f1': 0.07013574660633483,
'p': 0.41333333333333333,
'r': 0.038318912237330034},
{'a': 0.5369891360579411,
'f1': 0.10945273631840798,
'p': 0.4330708661417323,
'r': 0.06264236902050115},
{'a': 0.5251612903225806,
'f1': 0.08305647840531563,
'p': 0.3448275862068966,
'r': 0.047214353163361665},
{'a': 0.5749613601236476,
'f1': 0.12513255567338283,
'p': 0.3881578947368421,
'r': 0.07458912768647281},
{'a': 0.5814676616915423,
'f1': 0.09906291834002678,
'p': 0.25874125874125875,
'r': 0.061258278145695365},
{'a': 0.5814814814814815,
'f1': 0.05042016806722689,
'p': 0.21238938053097345,
'r': 0.028605482717520857},
{'a': 0.5602035048049746,
'f1': 0.13555555555555557,
'p': 0.40131578947368424,
'r': 0.08155080213903744},
{'a': 0.6377277599142551,
'f1': 0.055865921787709494,
'p': 0.17391304347826086,
'r': 0.033277870216306155},
{'a': 0.5768222338751966,
'f1': 0.035842293906810034,
'p': 0.2542372881355932,
'r': 0.019280205655526992},
{'a': 0.4246520874751491,
'f1': 0.08821676118462508,
'p': 0.45454545454545453,
'r': 0.04884856943475227},
{'a': 0.7023809523809523,
'f1': 0.07063197026022304,
'p': 0.14615384615384616,
'r': 0.04656862745098039},
{'a': 0.5881294964028777,
'f1': 0.06784260515603799,
'p': 0.24509803921568626,
'r': 0.03937007874015748},
{'a': 0.8129251700680272,
'f1': 0.1887905604719764,
'p': 0.22377622377622378,
'r': 0.16326530612244897},
{'a': 0.5969162995594713,
'f1': 0.13064133016627077,
'p': 0.3691275167785235,
'r': 0.07936507936507936},
{'a': 0.6387096774193548,
'f1': 0.1540785498489426,
'p': 0.35172413793103446,
'r': 0.09864603481624758},
{'a': 0.7348066298342542,
'f1': 0.16157205240174669,
'p': 0.2781954887218045,
'r': 0.11384615384615385},
{'a': 0.46415981198589895,
'f1': 0.07128309572301425,
'p': 0.5932203389830508,
'r': 0.03791982665222102},
{'a': 0.4232686980609418,
'f1': 0.17577197149643706,
'p': 0.7816901408450704,
'r': 0.09901873327386262},
{'a': 0.4493260109835247,
'f1': 0.07698744769874476,
'p': 0.5257142857142857,
'r': 0.04153498871331828},
{'a': 0.5974892114554727,
'f1': 0.10471204188481675,
'p': 0.40540540540540543,
'r': 0.06012024048096192}],
'www.cnn.com;2005': [{'a': 0.6589861751152074,
'f1': 0.10483870967741936,
'p': 0.1270358306188925,
'r': 0.08924485125858124},
{'a': 0.6099148723084626,
'f1': 0.06929510155316607,
'p': 0.07591623036649214,
'r': 0.06373626373626373},
{'a': 0.5303442028985508,
'f1': 0.12341504649196958,
'p': 0.21220930232558138,
'r': 0.08700834326579261},
{'a': 0.5985948477751757,
'f1': 0.10261780104712041,
'p': 0.14540059347181009,
'r': 0.07928802588996764},
{'a': 0.6853685368536854,
'f1': 0.10625,
'p': 0.11038961038961038,
'r': 0.10240963855421686},
{'a': 0.541544885177453,
'f1': 0.0663265306122449,
'p': 0.11538461538461539,
'r': 0.046539379474940336},
{'a': 0.6502857142857142,
'f1': 0.11046511627906977,
'p': 0.11949685534591195,
'r': 0.10270270270270271},
{'a': 0.6775262286029817,
'f1': 0.10153846153846155,
'p': 0.10714285714285714,
'r': 0.09649122807017543},
{'a': 0.4853503184713376,
'f1': 0.050156739811912224,
'p': 0.10738255033557047,
'r': 0.032719836400818},
{'a': 0.7640704945992041,
'f1': 0.09978308026030369,
'p': 0.10043668122270742,
'r': 0.09913793103448276},
{'a': 0.5419847328244275,
'f1': 0.0520446096654275,
'p': 0.0924092409240924,
'r': 0.03622250970245795},
{'a': 0.6626442812172089,
'f1': 0.09052333804809054,
'p': 0.10289389067524116,
'r': 0.08080808080808081},
{'a': 0.6202409638554217,
'f1': 0.09006928406466513,
'p': 0.11854103343465046,
'r': 0.07262569832402235},
{'a': 0.5553925165077036,
'f1': 0.06481481481481481,
'p': 0.17721518987341772,
'r': 0.039660056657223795},
{'a': 0.5228055077452668,
'f1': 0.04970008568980291,
'p': 0.09602649006622517,
'r': 0.03352601156069364},
{'a': 0.6716804979253111,
'f1': 0.18741976893453147,
'p': 0.21220930232558138,
'r': 0.167816091954023},
{'a': 0.6220883534136546,
'f1': 0.07107601184600197,
'p': 0.14754098360655737,
'r': 0.04681404421326398},
{'a': 0.6048593350383632,
'f1': 0.12298959318826869,
'p': 0.19461077844311378,
'r': 0.08990318118948824},
{'a': 0.6574519230769231,
'f1': 0.12037037037037038,
'p': 0.12420382165605096,
'r': 0.11676646706586827},
{'a': 0.6969001148105626,
'f1': 0.0896551724137931,
'p': 0.07975460122699386,
'r': 0.10236220472440945},
{'a': 0.5483870967741935,
'f1': 0.06417112299465241,
'p': 0.11076923076923077,
'r': 0.0451693851944793},
{'a': 0.5350588235294118,
'f1': 0.08348794063079776,
'p': 0.13975155279503104,
'r': 0.05952380952380952},
{'a': 0.6160842508377214,
'f1': 0.14316239316239315,
'p': 0.1936416184971098,
'r': 0.1135593220338983},
{'a': 0.7133333333333334,
'f1': 0.1920668058455115,
'p': 0.22549019607843138,
'r': 0.16727272727272727},
{'a': 0.42135593220338985,
'f1': 0.0479643056330173,
'p': 0.13109756097560976,
'r': 0.029351535836177476}],
'www.cnn.com;2010': [{'a': 0.4620505992010652,
'f1': 0.09518477043673013,
'p': 0.14554794520547945,
'r': 0.07071547420965059},
{'a': 0.5088055130168453,
'f1': 0.1325219743069642,
'p': 0.11694510739856802,
'r': 0.15288611544461778},
{'a': 0.5087591240875913,
'f1': 0.16707920792079206,
'p': 0.23519163763066203,
'r': 0.1295585412667946},
{'a': 0.322131395700878,
'f1': 0.09462191670036392,
'p': 0.11470588235294117,
'r': 0.0805230557467309},
{'a': 0.6582081822889695,
'f1': 0.20863309352517986,
'p': 0.13679245283018868,
'r': 0.4393939393939394},
{'a': 0.596466759646676,
'f1': 0.16054158607350097,
'p': 0.1292834890965732,
'r': 0.21173469387755103},
{'a': 0.37534530386740333,
'f1': 0.11366976972072514,
'p': 0.08094905792044661,
'r': 0.19078947368421054},
{'a': 0.5278001611603546,
'f1': 0.1344165435745938,
'p': 0.15016501650165018,
'r': 0.12165775401069519},
{'a': 0.4939858979676483,
'f1': 0.1347517730496454,
'p': 0.15151515151515152,
'r': 0.12132822477650064},
{'a': 0.40793010752688175,
'f1': 0.08515057113187954,
'p': 0.13290113452188007,
'r': 0.06264323911382735},
{'a': 0.5744870651204282,
'f1': 0.12637362637362637,
'p': 0.09019607843137255,
'r': 0.21100917431192662},
{'a': 0.4285341679707877,
'f1': 0.08898128898128899,
'p': 0.0888704318936877,
'r': 0.0890924229808493},
{'a': 0.4728079911209767,
'f1': 0.16617905207723813,
'p': 0.16265750286368844,
'r': 0.16985645933014354},
{'a': 0.3703071672354949,
'f1': 0.12351543942992874,
'p': 0.15515771526001704,
'r': 0.10259301014656144},
{'a': 0.9046695123528651,
'f1': 0.13396004700352526,
'p': 0.13225058004640372,
'r': 0.1357142857142857},
{'a': 0.5461568173039784,
'f1': 0.19575633127994524,
'p': 0.1581858407079646,
'r': 0.25673249551166966},
{'a': 0.3697139303482587,
'f1': 0.07821737153251478,
'p': 0.10709838107098381,
'r': 0.06160458452722063},
{'a': 0.4607809847198642,
'f1': 0.12362030905077262,
'p': 0.11211211211211211,
'r': 0.13776137761377613},
{'a': 0.3501088139281828,
'f1': 0.06643220007815553,
'p': 0.14382402707275804,
'r': 0.043191056910569105},
{'a': 0.5057875155022736,
'f1': 0.10078977059044755,
'p': 0.07850029291154072,
'r': 0.1407563025210084},
{'a': 0.9509987282112665,
'f1': 0.07876230661040787,
'p': 0.12280701754385964,
'r': 0.057971014492753624},
{'a': 0.43664443664443664,
'f1': 0.10184667039731393,
'p': 0.14375987361769352,
'r': 0.07885615251299827},
{'a': 0.5566793088916983,
'f1': 0.15024232633279486,
'p': 0.1411229135053111,
'r': 0.16062176165803108},
{'a': 0.49600798403193613,
'f1': 0.16712479384277076,
'p': 0.21714285714285714,
'r': 0.1358355674709562},
{'a': 0.5428949983438225,
'f1': 0.31137724550898205,
'p': 0.2859761686526123,
'r': 0.3417305585980285}],
'www.cnn.com;2015': [{'a': 0.2393500219587176,
'f1': 0.0698174006444683,
'p': 0.06996770721205597,
'r': 0.06966773847802786},
{'a': 0.43268242548818087,
'f1': 0.08305647840531563,
'p': 0.06510416666666667,
'r': 0.11467889908256881},
{'a': 0.3201803833145434,
'f1': 0.08774583963691378,
'p': 0.05876393110435663,
'r': 0.17313432835820897},
{'a': 0.20908004778972522,
'f1': 0.14359637774902975,
'p': 0.094414516586334,
'r': 0.29972997299729975},
{'a': 0.4798185941043084,
'f1': 0.07425343018563357,
'p': 0.059050064184852376,
'r': 0.1},
{'a': 0.2786471479050984,
'f1': 0.07627666451195864,
'p': 0.06316916488222699,
'r': 0.09624796084828711},
{'a': 0.2809871003925967,
'f1': 0.0723589001447178,
'p': 0.05417118093174431,
'r': 0.10893246187363835},
{'a': 0.2601442741208296,
'f1': 0.05959885386819484,
'p': 0.05591397849462366,
'r': 0.0638036809815951},
{'a': 0.36173330542181287,
'f1': 0.1374823196605375,
'p': 0.09299655568312284,
'r': 0.2635574837310195},
{'a': 0.1120754716981132,
'f1': 0.03959183673469388,
'p': 0.021050347222222224,
'r': 0.3321917808219178},
{'a': 0.4793038570084666,
'f1': 0.07364016736401674,
'p': 0.057068741893644616,
'r': 0.10377358490566038},
{'a': 0.24266666666666667,
'f1': 0.0718954248366013,
'p': 0.042746113989637305,
'r': 0.22602739726027396},
{'a': 0.31946812137742925,
'f1': 0.07076350093109869,
'p': 0.062193126022913256,
'r': 0.08207343412526998},
{'a': 0.19545938275984392,
'f1': 0.05578684429641964,
'p': 0.07235421166306695,
'r': 0.04539295392953929},
{'a': 0.35155987923515597,
'f1': 0.06843373493975903,
'p': 0.08482676224611709,
'r': 0.05735056542810985},
{'a': 0.24536585365853658,
'f1': 0.06412583182093164,
'p': 0.05656350053361793,
'r': 0.07402234636871509},
{'a': 0.1947297900848593,
'f1': 0.11574301128003922,
'p': 0.07091346153846154,
'r': 0.31466666666666665},
{'a': 0.29908814589665655,
'f1': 0.07240547063555913,
'p': 0.04923413566739606,
'r': 0.13677811550151975},
{'a': 0.3154084798345398,
'f1': 0.07282913165266107,
'p': 0.06341463414634146,
'r': 0.08552631578947369},
{'a': 0.2191780821917808,
'f1': 0.14133114215283482,
'p': 0.09980657640232109,
'r': 0.24202626641651032},
{'a': 0.578062449959968,
'f1': 0.08665511265164644,
'p': 0.06443298969072164,
'r': 0.13227513227513227},
{'a': 0.23074514715090796,
'f1': 0.06257153758107592,
'p': 0.07670720299345182,
'r': 0.05283505154639175},
{'a': 0.32028301886792454,
'f1': 0.06123778501628665,
'p': 0.04400749063670412,
'r': 0.1006423982869379},
{'a': 0.21709531013615735,
'f1': 0.06588447653429604,
'p': 0.07344064386317907,
'r': 0.05973813420621931},
{'a': 0.344954128440367,
'f1': 0.07272727272727274,
'p': 0.05228758169934641,
'r': 0.11940298507462686}],
'www.esquire.com;2000': [{'a': 0.6353741496598639,
'f1': 0.007407407407407407,
'p': 0.06896551724137931,
'r': 0.003913894324853229},
{'a': 0.5390243902439025,
'f1': 0.007874015748031496,
'p': 0.10344827586206896,
'r': 0.004092769440654843},
{'a': 0.6230498238550579,
'f1': 0.1380897583429229,
'p': 0.6976744186046512,
'r': 0.07662835249042145},
{'a': 0.7370517928286853,
'f1': 0.2611940298507463,
'p': 0.7216494845360825,
'r': 0.15945330296127563},
{'a': 0.5414043583535109,
'f1': 0.13987284287011806,
'p': 0.7549019607843137,
'r': 0.07707707707707707},
{'a': 0.771884984025559,
'f1': 0.48633093525179855,
'p': 0.8622448979591837,
'r': 0.33867735470941884},
{'a': 0.7477638640429338,
'f1': 0.07032967032967033,
'p': 0.26666666666666666,
'r': 0.04050632911392405},
{'a': 0.545101842870999,
'f1': 0.1857638888888889,
'p': 0.8106060606060606,
'r': 0.10490196078431373},
{'a': 0.719214753123141,
'f1': 0.148014440433213,
'p': 0.6119402985074627,
'r': 0.08418891170431211},
{'a': 0.47564038171772977,
'f1': 0.0019120458891013386,
'p': 0.034482758620689655,
'r': 0.0009832841691248771},
{'a': 0.6601941747572816,
'f1': 0.00437636761487965,
'p': 0.034482758620689655,
'r': 0.002336448598130841},
{'a': 0.6513815402704292,
'f1': 0.0420032310177706,
'p': 0.325,
'r': 0.022452504317789293},
{'a': 0.551567239635996,
'f1': 0.09211873080859775,
'p': 0.6521739130434783,
'r': 0.04955947136563876},
{'a': 0.6968822170900693,
'f1': 0.10865874363327674,
'p': 0.5614035087719298,
'r': 0.06015037593984962},
{'a': 0.6686358754027927,
'f1': 0.1870882740447958,
'p': 0.7244897959183674,
'r': 0.10741301059001512},
{'a': 0.6909937888198758,
'f1': 0.005,
'p': 0.034482758620689655,
'r': 0.0026954177897574125},
{'a': 0.4548238897396631,
'f1': 0.003731343283582089,
'p': 0.06896551724137931,
'r': 0.0019175455417066154},
{'a': 0.6367713004484304,
'f1': 0.11232876712328767,
'p': 0.6029411764705882,
'r': 0.061933534743202415},
{'a': 0.7156123041207196,
'f1': 0.0040650406504065045,
'p': 0.034482758620689655,
'r': 0.0021598272138228943},
{'a': 0.8236421725239617,
'f1': 0.08000000000000002,
'p': 0.2,
'r': 0.05},
{'a': 0.6491228070175439,
'f1': 0.01234567901234568,
'p': 0.10344827586206896,
'r': 0.006564551422319475},
{'a': 0.5155889145496536,
'f1': 0.007100591715976331,
'p': 0.10344827586206896,
'r': 0.003676470588235294},
{'a': 0.6307439824945296,
'f1': 0.005891016200294551,
'p': 0.06896551724137931,
'r': 0.003076923076923077},
{'a': 0.3333333333333333,
'f1': 0.125,
'p': 0.07692307692307693,
'r': 0.3333333333333333},
{'a': 0.6996527777777778,
'f1': 0.11884550084889643,
'p': 0.5645161290322581,
'r': 0.06641366223908918}],
'www.esquire.com;2005': [{'a': 0.5685774946921444,
'f1': 0.0846846846846847,
'p': 0.5802469135802469,
'r': 0.0456754130223518},
{'a': 0.6064377682403433,
'f1': 0.20191470844212359,
'p': 0.7682119205298014,
'r': 0.11623246492985972},
{'a': 0.7042726836293807,
'f1': 0.2752941176470588,
'p': 0.7697368421052632,
'r': 0.167621776504298},
{'a': 0.61492673992674,
'f1': 0.10816542948038176,
'p': 0.5930232558139535,
'r': 0.059509918319719954},
{'a': 0.5693842150910667,
'f1': 0.03872216844143272,
'p': 0.37037037037037035,
'r': 0.020429009193054137},
{'a': 0.6831194471865746,
'f1': 0.08806818181818181,
'p': 0.4626865671641791,
'r': 0.04866562009419152},
{'a': 0.3933649289099526,
'f1': 0.014218009478672985,
'p': 0.057971014492753624,
'r': 0.008102633355840648},
{'a': 0.632741617357002,
'f1': 0.22738589211618257,
'p': 0.7965116279069767,
'r': 0.13262342691190707},
{'a': 0.5779135748581405,
'f1': 0.06750241080038571,
'p': 0.5,
'r': 0.03619441571871768},
{'a': 0.5944444444444444,
'f1': 0.0022779043280182236,
'p': 0.027777777777777776,
'r': 0.0011876484560570072},
{'a': 0.602277520033741,
'f1': 0.06910167818361303,
'p': 0.5147058823529411,
'r': 0.037037037037037035},
{'a': 0.7123501823866597,
'f1': 0.0547945205479452,
'p': 0.3076923076923077,
'r': 0.03007518796992481},
{'a': 0.7563070316693505,
'f1': 0.11328124999999999,
'p': 0.4461538461538462,
'r': 0.06487695749440715},
{'a': 0.5751484696208314,
'f1': 0.05679513184584179,
'p': 0.4444444444444444,
'r': 0.030335861321776816},
{'a': 0.5896980461811723,
'f1': 0.07042253521126761,
'p': 0.5,
'r': 0.03787878787878788},
{'a': 0.6205882352941177,
'f1': 0.004410143329658214,
'p': 0.05405405405405406,
'r': 0.0022988505747126436},
{'a': 0.6353200883002208,
'f1': 0.25719424460431656,
'p': 0.8125,
'r': 0.1527777777777778},
{'a': 0.583790628957366,
'f1': 0.1558219178082192,
'p': 0.7222222222222222,
'r': 0.0873320537428023},
{'a': 0.5907335907335908,
'f1': 0.14973262032085563,
'p': 0.7058823529411765,
'r': 0.08374875373878365},
{'a': 0.5548172757475083,
'f1': 0.2592137592137592,
'p': 0.854251012145749,
'r': 0.15278783490224476},
{'a': 0.6462619167084797,
'f1': 0,
'p': 0.0,
'r': 0.0},
{'a': 0.580115036976171,
'f1': 0.13682432432432434,
'p': 0.6982758620689655,
'r': 0.07584269662921349},
{'a': 0.644955300127714,
'f1': 0.34330708661417325,
'p': 0.8755020080321285,
'r': 0.21351616062683643},
{'a': 0.5629310344827586,
'f1': 0.024999999999999998,
'p': 0.2708333333333333,
'r': 0.01310483870967742},
{'a': 0.5370442963543708,
'f1': 0.10598031794095382,
'p': 0.6730769230769231,
'r': 0.05751848808545604}],
'www.esquire.com;2010': [{'a': 0.5196390658174098,
'f1': 0.041313559322033906,
'p': 0.03373702422145329,
'r': 0.05327868852459016},
{'a': 0.36376811594202896,
'f1': 0.04058694973462378,
'p': 0.06403940886699508,
'r': 0.029707495429616086},
{'a': 0.5881372875708097,
'f1': 0.0462962962962963,
'p': 0.02895752895752896,
'r': 0.11538461538461539},
{'a': 0.46607431340872374,
'f1': 0.04156597390043499,
'p': 0.04232283464566929,
'r': 0.04083570750237417},
{'a': 0.44330708661417323,
'f1': 0.04416403785488958,
'p': 0.0475266731328807,
'r': 0.041245791245791245},
{'a': 0.5204985241062644,
'f1': 0.10962241169305725,
'p': 0.08637236084452975,
'r': 0.15},
{'a': 0.5460572226099093,
'f1': 0.056562726613488025,
'p': 0.03931451612903226,
'r': 0.10077519379844961},
{'a': 0.46049743964886614,
'f1': 0.15858528237307473,
'p': 0.14404145077720207,
'r': 0.17639593908629442},
{'a': 0.4706524706524707,
'f1': 0.03867129400099157,
'p': 0.038883349950149554,
'r': 0.038461538461538464},
{'a': 0.44309338521400776,
'f1': 0.05136702568351284,
'p': 0.060546875,
'r': 0.04460431654676259},
{'a': 0.5069740962140621,
'f1': 0.05147864184008763,
'p': 0.04549854791868345,
'r': 0.05926860025220681},
{'a': 0.4813753581661891,
'f1': 0.04029692470837752,
'p': 0.03811434302908726,
'r': 0.04274465691788527},
{'a': 0.42113600362072867,
'f1': 0.04836309523809524,
'p': 0.0634765625,
'r': 0.0390625},
{'a': 0.5644896619625861,
'f1': 0.059532246633593196,
'p': 0.041749502982107355,
'r': 0.1037037037037037},
{'a': 0.4397984886649874,
'f1': 0.04055220017256255,
'p': 0.04667328699106256,
'r': 0.03585049580472922},
{'a': 0.5776576576576576,
'f1': 0.06389776357827476,
'p': 0.039177277179236046,
'r': 0.17316017316017315},
{'a': 0.5655487804878049,
'f1': 0.04362416107382551,
'p': 0.02634245187436677,
'r': 0.12682926829268293},
{'a': 0.5270663562281723,
'f1': 0.21611191509889047,
'p': 0.18887015177065766,
'r': 0.2525366403607666},
{'a': 0.492661312655774,
'f1': 0.17028985507246377,
'p': 0.16666666666666666,
'r': 0.17407407407407408},
{'a': 0.42766011737688187,
'f1': 0.12417024599765715,
'p': 0.14943609022556392,
'r': 0.1062124248496994},
{'a': 0.5714788732394366,
'f1': 0.15661815661815665,
'p': 0.1050185873605948,
'r': 0.3079019073569482},
{'a': 0.5559246954595792,
'f1': 0.12125639152666179,
'p': 0.08003857280617165,
'r': 0.25},
{'a': 0.34931096464949074,
'f1': 0.09449694274596998,
'p': 0.1541251133272892,
'r': 0.06813627254509018},
{'a': 0.4394743492544857,
'f1': 0.04231433506044904,
'p': 0.04909819639278557,
'r': 0.03717754172989378},
{'a': 0.39154243991481597,
'f1': 0.04122722914669223,
'p': 0.048863636363636366,
'r': 0.03565505804311774}],
'www.esquire.com;2015': [{'a': 0.2815464556418935,
'f1': 0.008421606684650305,
'p': 0.004259567387687188,
'r': 0.367816091954023},
{'a': 0.2892430278884462,
'f1': 0.02205597833096866,
'p': 0.011347047113470472,
'r': 0.3922018348623853},
{'a': 0.27834027252081756,
'f1': 0.00883748131782442,
'p': 0.004470742932281394,
'r': 0.37988826815642457},
{'a': 0.4793280354089126,
'f1': 0.08194395175594182,
'p': 0.04421052631578947,
'r': 0.559322033898305},
{'a': 0.30662051828186015,
'f1': 0.06688164337752299,
'p': 0.03728858702889865,
'r': 0.32407407407407407},
{'a': 0.31959157626037016,
'f1': 0.09883360567983322,
'p': 0.058210540289393334,
'r': 0.32711674748228275},
{'a': 0.29098680884313094,
'f1': 0.042339210330270674,
'p': 0.022264298772525462,
'r': 0.4305555555555556},
{'a': 0.2880916462379785,
'f1': 0.024168012924071083,
'p': 0.012405466365928089,
'r': 0.46633416458852867},
{'a': 0.3802784401199908,
'f1': 0.03705031672044939,
'p': 0.02107409925220938,
'r': 0.15316205533596838},
{'a': 0.3299702380952381,
'f1': 0.18475466232120225,
'p': 0.16622141135075258,
'r': 0.20793935441799805},
{'a': 0.29014989293361887,
'f1': 0.028787975288198205,
'p': 0.014992702666843572,
'r': 0.36044657097288674},
{'a': 0.37776770195722814,
'f1': 0.045001159823706796,
'p': 0.0264521407144805,
'r': 0.15062111801242237},
{'a': 0.2895745675549322,
'f1': 0.03541957598070331,
'p': 0.018333552372190826,
'r': 0.5205223880597015},
{'a': 0.3093316688216149,
'f1': 0.06966548555642303,
'p': 0.03832430289414563,
'r': 0.3823335530652604},
{'a': 0.302854026540629,
'f1': 0.053553800592300096,
'p': 0.02870940001323014,
'r': 0.39780018331805683},
{'a': 0.3895080401209998,
'f1': 0.03156964263164541,
'p': 0.01697216564833673,
'r': 0.22563176895306858},
{'a': 0.28954123302375506,
'f1': 0.027346495431601815,
'p': 0.014256212111118513,
'r': 0.334375},
{'a': 0.2883014252787055,
'f1': 0.015614834092387767,
'p': 0.007968656617305265,
'r': 0.3858520900321543},
{'a': 0.388417853143497,
'f1': 0.027968471904398676,
'p': 0.014970059880239521,
'r': 0.21235521235521235},
{'a': 0.3116512412421071,
'f1': 0.08095623051160643,
'p': 0.046457684405858575,
'r': 0.3144908030506954},
{'a': 0.28942921517085995,
'f1': 0.017012987012987014,
'p': 0.00868067059836989,
'r': 0.42394822006472493},
{'a': 0.3640125570776256,
'f1': 0.050489987217724755,
'p': 0.03224928561709076,
'r': 0.11623344776851398},
{'a': 0.31888070190382894,
'f1': 0.09146687876377685,
'p': 0.053506148222000664,
'r': 0.3148220570981619},
{'a': 0.3252278340856827,
'f1': 0.09925530100619635,
'p': 0.05781839856944169,
'r': 0.3503210272873194},
{'a': 0.38909682668836454,
'f1': 0.02009919081179849,
'p': 0.010497614178595774,
'r': 0.23547400611620795},
{'a': 0.28694673668417103,
'f1': 0.019471308833010962,
'p': 0.010009280127270316,
'r': 0.3561320754716981}],
'www.forbes.com;2000': [{'a': 0.4138162307176392,
'f1': 0.15473887814313345,
'p': 0.21978021978021978,
'r': 0.11940298507462686},
{'a': 0.4181159420289855,
'f1': 0.15916230366492148,
'p': 0.19895287958115182,
'r': 0.13263525305410123},
{'a': 0.1745580808080808,
'f1': 0.032556418793932666,
'p': 0.1301775147928994,
'r': 0.018604651162790697},
{'a': 0.42283298097251587,
'f1': 0.24236817761332102,
'p': 0.3119047619047619,
'r': 0.19818456883509833},
{'a': 0.5432098765432098,
'f1': 0.01520912547528517,
'p': 0.012618296529968454,
'r': 0.019138755980861243},
{'a': 0.4189278623428193,
'f1': 0.12200000000000001,
'p': 0.17039106145251395,
'r': 0.09501557632398754},
{'a': 0.2661246612466125,
'f1': 0.028694404591104734,
'p': 0.0641025641025641,
'r': 0.018484288354898338},
{'a': 0.44874715261959,
'f1': 0.0215633423180593,
'p': 0.026058631921824105,
'r': 0.01839080459770115},
{'a': 0.3072916666666667,
'f1': 0.13971539456662355,
'p': 0.26932668329177056,
'r': 0.0943231441048035},
{'a': 0.23922518159806294,
'f1': 0.03678724708767627,
'p': 0.45454545454545453,
'r': 0.019169329073482427},
{'a': 0.4340974212034384,
'f1': 0.09195402298850575,
'p': 0.11764705882352941,
'r': 0.07547169811320754},
{'a': 0.18505613627564846,
'f1': 0.01773215118992067,
'p': 0.08296943231441048,
'r': 0.009926854754440962},
{'a': 0.46938775510204084,
'f1': 0.09972299168975068,
'p': 0.11076923076923077,
'r': 0.0906801007556675},
{'a': 0.4826568265682657,
'f1': 0.15643802647412755,
'p': 0.18361581920903955,
'r': 0.13626834381551362},
{'a': 0.550098231827112,
'f1': 0.1642335766423358,
'p': 0.12931034482758622,
'r': 0.225},
{'a': 0.3291500285225328,
'f1': 0.06518282988871224,
'p': 0.12538226299694188,
'r': 0.04403866809881848},
{'a': 0.30526315789473685,
'f1': 0.0958904109589041,
'p': 0.18716577540106952,
'r': 0.06445672191528545},
{'a': 0.33564198188598826,
'f1': 0.1370242214532872,
'p': 0.2551546391752577,
'r': 0.09366130558183539},
{'a': 0.37333333333333335,
'f1': 0.09298245614035089,
'p': 0.16012084592145015,
'r': 0.06551297898640297},
{'a': 0.6410453497309762,
'f1': 0.02505219206680585,
'p': 0.0169971671388102,
'r': 0.047619047619047616},
{'a': 0.7522935779816514,
'f1': 0.2857142857142857,
'p': 0.48214285714285715,
'r': 0.20300751879699247},
{'a': 0.46968590211833455,
'f1': 0.13157894736842105,
'p': 0.15714285714285714,
'r': 0.11316872427983539},
{'a': 0.35719612229679343,
'f1': 0.03794642857142857,
'p': 0.07488986784140969,
'r': 0.025411061285500747},
{'a': 0.3987341772151899,
'f1': 0.09714889123548046,
'p': 0.13068181818181818,
'r': 0.0773109243697479},
{'a': 0.21024679787566386,
'f1': 0.10608203677510608,
'p': 0.3401360544217687,
'r': 0.06284038542103058},
{'a': 0.3528198074277854,
'f1': 0.03881511746680286,
'p': 0.06188925081433225,
'r': 0.028273809523809524},
{'a': 0.44759825327510916,
'f1': 0.12254335260115606,
'p': 0.14285714285714285,
'r': 0.10728744939271255},
{'a': 0.43105320304017375,
'f1': 0.32036316472114135,
'p': 0.8606271777003485,
'r': 0.19681274900398407}],
'www.forbes.com;2005': [{'a': 0.863003663003663,
'f1': 0.06965174129353233,
'p': 0.06306306306306306,
'r': 0.07777777777777778},
{'a': 0.6520674646354734,
'f1': 0.03617181612660136,
'p': 0.09836065573770492,
'r': 0.0221606648199446},
{'a': 0.8760999648011264,
'f1': 0.6097560975609757,
'p': 0.507380073800738,
'r': 0.7638888888888888},
{'a': 0.7490438364224772,
'f1': 0.08378088077336197,
'p': 0.10427807486631016,
'r': 0.07001795332136446},
{'a': 0.7568866111467009,
'f1': 0.045283018867924525,
'p': 0.09137055837563451,
'r': 0.030100334448160536},
{'a': 0.8789423383243071,
'f1': 0.7538860103626942,
'p': 0.6598639455782312,
'r': 0.879154078549849},
{'a': 0.8394975575715282,
'f1': 0.08366533864541834,
'p': 0.08936170212765958,
'r': 0.07865168539325842},
{'a': 0.8722084367245657,
'f1': 0.7193460490463215,
'p': 0.6219081272084805,
'r': 0.8529886914378029},
{'a': 0.8945892907204934,
'f1': 0.8368055555555556,
'p': 0.7761674718196457,
'r': 0.9077212806026366},
{'a': 0.8564867967853043,
'f1': 0.03598971722365038,
'p': 0.021875,
'r': 0.10144927536231885},
{'a': 0.8617436564736499,
'f1': 0.013921113689095127,
'p': 0.016853932584269662,
'r': 0.011857707509881422},
{'a': 0.871244635193133,
'f1': 0.5588235294117647,
'p': 0.4318181818181818,
'r': 0.7916666666666666},
{'a': 0.8811320754716981,
'f1': 0.4596912521440823,
'p': 0.3358395989974937,
'r': 0.7282608695652174},
{'a': 0.734319716950788,
'f1': 0.030516431924882632,
'p': 0.058823529411764705,
'r': 0.020602218700475437},
{'a': 0.7574793125397836,
'f1': 0.07299270072992702,
'p': 0.15228426395939088,
'r': 0.048},
{'a': 0.8661327231121282,
'f1': 0.022284122562674095,
'p': 0.022988505747126436,
'r': 0.021621621621621623},
{'a': 0.9159856162144492,
'f1': 0.8186309103740296,
'p': 0.7806191117092867,
'r': 0.8605341246290801},
{'a': 0.9171993911719939,
'f1': 0.8508771929824561,
'p': 0.8177028451001054,
'r': 0.8868571428571429},
{'a': 0.7154542798714578,
'f1': 0.06346153846153846,
'p': 0.15137614678899083,
'r': 0.040145985401459854},
{'a': 0.8875379939209727,
'f1': 0.7888127853881278,
'p': 0.7145811789038262,
'r': 0.8802547770700637},
{'a': 0.6865747830525778,
'f1': 0.06259541984732825,
'p': 0.10761154855643044,
'r': 0.04413347685683531},
{'a': 0.7896095514682155,
'f1': 0.05507246376811594,
'p': 0.0945273631840796,
'r': 0.03885480572597137},
{'a': 0.7582174462705437,
'f1': 0.056720098643649804,
'p': 0.10599078341013825,
'r': 0.03872053872053872},
{'a': 0.8942652329749103,
'f1': 0.855442772138607,
'p': 0.8179384203480589,
'r': 0.896551724137931},
{'a': 0.6976360637713029,
'f1': 0.04844290657439446,
'p': 0.12903225806451613,
'r': 0.029818956336528223}],
'www.forbes.com;2010': [{'a': 0.7487266553480475,
'f1': 0.19565217391304346,
'p': 0.1875,
'r': 0.20454545454545456},
{'a': 0.4982834722903384,
'f1': 0.12188841201716738,
'p': 0.33490566037735847,
'r': 0.07450157397691501},
{'a': 0.6392572944297082,
'f1': 0.20234604105571846,
'p': 0.3,
'r': 0.15265486725663716},
{'a': 0.6356329537843268,
'f1': 0.1782477341389728,
'p': 0.3224043715846995,
'r': 0.12317327766179541},
{'a': 0.4847250509164969,
'f1': 0.13504273504273506,
'p': 0.36574074074074076,
'r': 0.08280922431865828},
{'a': 0.6084415584415584,
'f1': 0.15189873417721517,
'p': 0.27979274611398963,
'r': 0.10424710424710425},
{'a': 0.6652334152334153,
'f1': 0.24200278164116829,
'p': 0.3,
'r': 0.20279720279720279},
{'a': 0.5815642458100558,
'f1': 0.27069133398247325,
'p': 0.47278911564625853,
'r': 0.18963165075034105},
{'a': 0.574251497005988,
'f1': 0.16056670602125148,
'p': 0.2982456140350877,
'r': 0.1098546042003231},
{'a': 0.6199395770392749,
'f1': 0.11533052039381152,
'p': 0.16666666666666666,
'r': 0.08817204301075268},
{'a': 0.540570789031897,
'f1': 0.20981713185755535,
'p': 0.3879003558718861,
'r': 0.1437994722955145},
{'a': 0.4894878706199461,
'f1': 0.09377990430622009,
'p': 0.21973094170403587,
'r': 0.05961070559610705},
{'a': 0.6045602605863192,
'f1': 0.15577190542420027,
'p': 0.2545454545454545,
'r': 0.11222444889779559},
{'a': 0.5645539906103286,
'f1': 0.13317757009345796,
'p': 0.27941176470588236,
'r': 0.08742331288343558},
{'a': 0.6227217496962333,
'f1': 0.2736842105263158,
'p': 0.375,
'r': 0.2154696132596685},
{'a': 0.4935866983372922,
'f1': 0.05328596802841917,
'p': 0.12345679012345678,
'r': 0.03397508493771234},
{'a': 0.6128391793514228,
'f1': 0.14348462664714495,
'p': 0.23557692307692307,
'r': 0.1031578947368421},
{'a': 0.6029911624745071,
'f1': 0.13864306784660768,
'p': 0.24102564102564103,
'r': 0.09730848861283643},
{'a': 0.6577726218097448,
'f1': 0.25692695214105793,
'p': 0.31875,
'r': 0.21518987341772153},
{'a': 0.6054087277197295,
'f1': 0.17054263565891473,
'p': 0.24,
'r': 0.13226452905811623},
{'a': 0.6,
'f1': 0.211734693877551,
'p': 0.386046511627907,
'r': 0.14586994727592267},
{'a': 0.6530856800479329,
'f1': 0.030150753768844216,
'p': 0.017892644135188866,
'r': 0.09574468085106383},
{'a': 0.670219064535228,
'f1': 0.060708263069139956,
'p': 0.045454545454545456,
'r': 0.09137055837563451},
{'a': 0.5246073298429319,
'f1': 0.13523
[928075 chars truncated]
[About 18862 more lines. Double-click to unfold]
>>> for trimmed in trimmed_results:
... pass
>>> packages = [os.path.join(f,filename)
... for f in listsubdir(['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests'])
... for filename in os.listdir(f)
... if filename == "results.pkl"]
...
... def trim_results(domain_path):
... domain_results = pickle.load(open(domain_path))
... part = {str(val['domain']+';'+val['year']):[] for domainyear in domain_results for key,val, in domainyear.items()}
...
... for domainyear in domain_results:
... for key,val in domainyear.items():
... part[str(val['domain']+";"+val['year'])].append(val['measurements'])
...
... domain_path = os.path.join(*(os.path.split(domain_path)[:-1]))
...
... trimmed_path = os.path.join(domain_path,'trimmed.pkl')
...
... pickle.dump(part,open(trimmed_path,'wb'))
...
... return part
...
... trimmed_results = []
... for pack in packages:
... trimmed_results.append([os.path.join(*(os.path.split(pack)[:-1])),trim_results(pack)])
>>> trimmed_results[0]
21: ['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter',
{'entertainment.msn.com;2000': [{'a': 0.9472774416594641,
'f1': 0.8551068883610451,
'p': 0.8695652173913043,
'r': 0.8411214953271028},
{'a': 0.9224137931034483,
'f1': 0.8266199649737302,
'p': 0.8973384030418251,
'r': 0.7662337662337663},
{'a': 0.933852140077821,
'f1': 0.9098939929328622,
'p': 0.9501845018450185,
'r': 0.8728813559322034},
{'a': 0.8989405052974735,
'f1': 0.7847222222222223,
'p': 0.7174603174603175,
'r': 0.8659003831417624},
{'a': 0.927536231884058,
'f1': 0.8558758314855874,
'p': 0.8577777777777778,
'r': 0.8539823008849557},
{'a': 0.9419152276295133,
'f1': 0.87987012987013,
'p': 0.9093959731543624,
'r': 0.8522012578616353},
{'a': 0.9335180055401662,
'f1': 0.916083916083916,
'p': 0.9509981851179673,
'r': 0.8836424957841484},
{'a': 0.8910472972972973,
'f1': 0.7425149700598803,
'p': 0.6813186813186813,
'r': 0.8157894736842105},
{'a': 0.894695170229612,
'f1': 0.8129395218002814,
'p': 0.774798927613941,
'r': 0.8550295857988166},
{'a': 0.8803122289679098,
'f1': 0.676056338028169,
'p': 0.6075949367088608,
'r': 0.7619047619047619},
{'a': 0.9037656903765691,
'f1': 0.8804159445407279,
'p': 0.8581081081081081,
'r': 0.9039145907473309},
{'a': 0.9509632224168126,
'f1': 0.9087947882736157,
'p': 0.9,
'r': 0.9177631578947368},
{'a': 0.9333680374804789,
'f1': 0.9330543933054394,
'p': 0.970620239390642,
'r': 0.8982880161127895},
{'a': 0.9530469530469531,
'f1': 0.6356589147286822,
'p': 0.6029411764705882,
'r': 0.6721311475409836},
{'a': 0.9472981987991995,
'f1': 0.9090909090909091,
'p': 0.9360189573459715,
'r': 0.883668903803132},
{'a': 0.9328579916815211,
'f1': 0.8986547085201794,
'p': 0.9488636363636364,
'r': 0.8534923339011925},
{'a': 0.9454148471615721,
'f1': 0.8898678414096917,
'p': 0.9181818181818182,
'r': 0.8632478632478633},
{'a': 0.8812227074235808,
'f1': 0.7301587301587301,
'p': 0.6789667896678967,
'r': 0.7896995708154506},
{'a': 0.8959484346224678,
'f1': 0.7064935064935064,
'p': 0.6507177033492823,
'r': 0.7727272727272727},
{'a': 0.889644746787604,
'f1': 0.7972222222222222,
'p': 0.7572559366754618,
'r': 0.841642228739003},
{'a': 0.9303030303030303,
'f1': 0.8993435448577681,
'p': 0.9383561643835616,
'r': 0.8634453781512605},
{'a': 0.9379900213827512,
'f1': 0.9186155285313378,
'p': 0.9478764478764479,
'r': 0.8911070780399274},
{'a': 0.9497374343585896,
'f1': 0.9130998702983139,
'p': 0.9263157894736842,
'r': 0.9002557544757033},
{'a': 0.9401555954518253,
'f1': 0.9159663865546219,
'p': 0.9527972027972028,
'r': 0.8818770226537217},
{'a': 0.9470013947001394,
'f1': 0.9573273441886581,
'p': 0.9567901234567902,
'r': 0.9578651685393258}],
'entertainment.msn.com;2005': [{'a': 0.6906686260102866,
'f1': 0.6982078853046595,
'p': 0.5553021664766249,
'r': 0.9401544401544402},
{'a': 0.5468451242829828,
'f1': 0.21000000000000002,
'p': 0.12401574803149606,
'r': 0.6847826086956522},
{'a': 0.49862825788751713,
'f1': 0.23455497382198953,
'p': 0.14267515923566879,
'r': 0.6588235294117647},
{'a': 0.6598006644518273,
'f1': 0.580327868852459,
'p': 0.4270205066344994,
'r': 0.9053708439897699},
{'a': 0.6897289586305279,
'f1': 0.7030716723549487,
'p': 0.5640744797371303,
'r': 0.9329710144927537},
{'a': 0.6415343915343915,
'f1': 0.5893939393939394,
'p': 0.44457142857142856,
'r': 0.8741573033707866},
{'a': 0.6283185840707964,
'f1': 0.5948553054662379,
'p': 0.44417767106842737,
'r': 0.9002433090024331},
{'a': 0.7079343365253078,
'f1': 0.7292327203551046,
'p': 0.5958549222797928,
'r': 0.9395424836601307},
{'a': 0.6803051317614425,
'f1': 0.6069906223358909,
'p': 0.4587628865979381,
'r': 0.8967254408060453},
{'a': 0.8139684583199227,
'f1': 0.8720106288751107,
'p': 0.7942718838241226,
'r': 0.9666175748649976},
{'a': 0.7660757733750434,
'f1': 0.8402563493947306,
'p': 0.7378074197582326,
'r': 0.9757442116868799},
{'a': 0.6724023825281271,
'f1': 0.6583850931677019,
'p': 0.5107066381156317,
'r': 0.9262135922330097},
{'a': 0.6503153468815698,
'f1': 0.6453447050461976,
'p': 0.49780701754385964,
'r': 0.9171717171717172},
{'a': 0.7622252131000449,
'f1': 0.8039940828402367,
'p': 0.7003865979381443,
'r': 0.9435763888888888},
{'a': 0.5560640732265446,
'f1': 0.4203187250996016,
'p': 0.2779973649538867,
'r': 0.8612244897959184},
{'a': 0.6134852801519468,
'f1': 0.36899224806201547,
'p': 0.2389558232931727,
'r': 0.8095238095238095},
{'a': 0.6816109422492401,
'f1': 0.6823351023502654,
'p': 0.5357142857142857,
'r': 0.9394572025052192},
{'a': 0.5220125786163522,
'f1': 0.27790973871733965,
'p': 0.16931982633863965,
'r': 0.7748344370860927},
{'a': 0.6473509933774835,
'f1': 0.5988700564971752,
'p': 0.4428969359331476,
'r': 0.9244186046511628},
{'a': 0.662015503875969,
'f1': 0.6466774716369531,
'p': 0.4962686567164179,
'r': 0.9279069767441861},
{'a': 0.5902621722846442,
'f1': 0.3572267920094007,
'p': 0.22926093514328807,
'r': 0.8085106382978723},
{'a': 0.5920763022743947,
'f1': 0.540495867768595,
'p': 0.38698224852071006,
'r': 0.8958904109589041},
{'a': 0.6629581151832461,
'f1': 0.57190357439734,
'p': 0.4226044226044226,
'r': 0.884318766066838},
{'a': 0.7147041593438781,
'f1': 0.7240793201133144,
'p': 0.5889400921658986,
'r': 0.9397058823529412},
{'a': 0.6313432835820896,
'f1': 0.6085578446909667,
'p': 0.4544378698224852,
'r': 0.920863309352518},
{'a': 0.5412639405204461,
'f1': 0.25392986698911735,
'p': 0.15306122448979592,
'r': 0.7446808510638298},
{'a': 0.6824005394470668,
'f1': 0.6713189113747383,
'p': 0.5320796460176991,
'r': 0.9092627599243857}],
'entertainment.msn.com;2010': [{'a': 0.3428857047650783,
'f1': 0.1658206429780034,
'p': 0.09201877934272301,
'r': 0.8376068376068376},
{'a': 0.45119947420308903,
'f1': 0.42013888888888884,
'p': 0.2742520398912058,
'r': 0.8976261127596439},
{'a': 0.6411235342241615,
'f1': 0.5517711171662125,
'p': 0.4136874361593463,
'r': 0.8282208588957055},
{'a': 0.9039820166987798,
'f1': 0.8546426835196889,
'p': 0.8108856088560885,
'r': 0.9033915724563206},
{'a': 0.3921737260804128,
'f1': 0.34117921230482406,
'p': 0.20938215102974828,
'r': 0.9207547169811321},
{'a': 0.5535641547861507,
'f1': 0.3234567901234568,
'p': 0.19969512195121952,
'r': 0.8506493506493507},
{'a': 0.14695238095238095,
'f1': 0.10955363356198429,
'p': 0.058325394305070395,
'r': 0.9003267973856209},
{'a': 0.7608596250571559,
'f1': 0.6565988181221274,
'p': 0.5488474204171241,
'r': 0.8169934640522876},
{'a': 0.884125920964501,
'f1': 0.7920673076923077,
'p': 0.7289823008849557,
'r': 0.8671052631578947},
{'a': 0.896735273243435,
'f1': 0.8186915887850468,
'p': 0.7595375722543353,
'r': 0.8878378378378379},
{'a': 0.7920924033762772,
'f1': 0.7240566037735848,
'p': 0.6220871327254306,
'r': 0.8660084626234132},
{'a': 0.36601513240857503,
'f1': 0.25046589638464406,
'p': 0.14608695652173914,
'r': 0.8772845953002611},
{'a': 0.1811268579329416,
'f1': 0.1899469994870918,
'p': 0.10574909575480677,
'r': 0.9320469798657718},
{'a': 0.41000352236703064,
'f1': 0.32595573440643866,
'p': 0.20009881422924902,
'r': 0.8785249457700651},
{'a': 0.7624944714727997,
'f1': 0.6670799752014879,
'p': 0.5563598759048604,
'r': 0.8328173374613003},
{'a': 0.4393613754989254,
'f1': 0.42506297229219153,
'p': 0.27439024390243905,
'r': 0.9427374301675978},
{'a': 0.8097795364612775,
'f1': 0.837321730722746,
'p': 0.7646799116997792,
'r': 0.9252136752136753},
{'a': 0.13969136253931105,
'f1': 0.13577253691866875,
'p': 0.07321131447587355,
'r': 0.9333333333333333},
{'a': 0.6126237623762376,
'f1': 0.44503546099290786,
'p': 0.2977461447212337,
'r': 0.8807017543859649},
{'a': 0.7682926829268293,
'f1': 0.7115384615384617,
'p': 0.6161262050832603,
'r': 0.8419161676646707},
{'a': 0.6949648711943794,
'f1': 0.7540132200188858,
'p': 0.6240719030871434,
'r': 0.9522957662492546},
{'a': 0.4694069657985566,
'f1': 0.4663931839697065,
'p': 0.3124735729386892,
'r': 0.9191542288557214},
{'a': 0.789193302891933,
'f1': 0.7656514382402708,
'p': 0.6830188679245283,
'r': 0.8710298363811357},
{'a': 0.3921901528013582,
'f1': 0.32634408602150533,
'p': 0.19927774130006565,
'r': 0.900593471810089},
{'a': 0.7479967948717948,
'f1': 0.6831234256926951,
'p': 0.5809768637532133,
'r': 0.8288508557457213}],
'entertainment.msn.com;2015': [{'a': 0.5922783603431839,
'f1': 0.30057236304170076,
'p': 0.18274010737721216,
'r': 0.8462246777163904},
{'a': 0.5627964528768818,
'f1': 0.08620689655172414,
'p': 0.045599635202918376,
'r': 0.7874015748031497},
{'a': 0.5792236086353734,
'f1': 0.20978240654640132,
'p': 0.12028150991682661,
'r': 0.8197674418604651},
{'a': 0.5620437956204379,
'f1': 0.08229211546747094,
'p': 0.0435278030993619,
'r': 0.7519685039370079},
{'a': 0.5617125883437468,
'f1': 0.08782775527606056,
'p': 0.046627433227704844,
'r': 0.7545787545787546},
{'a': 0.5577628361858191,
'f1': 0.08821676118462508,
'p': 0.04678102027177545,
'r': 0.7720588235294118},
{'a': 0.49898887765419614,
'f1': 0.17462520821765687,
'p': 0.09741365959423881,
'r': 0.8420348058902276},
{'a': 0.5571502323180175,
'f1': 0.06700021753317381,
'p': 0.03503184713375796,
'r': 0.7661691542288557},
{'a': 0.5541204819277108,
'f1': 0.11345343043311613,
'p': 0.060980634528224144,
'r': 0.8131868131868132},
{'a': 0.5564376590330788,
'f1': 0.07825719120135363,
'p': 0.04120267260579064,
'r': 0.7773109243697479},
{'a': 0.4916342588405535,
'f1': 0.0852725793327909,
'p': 0.04492455418381344,
'r': 0.8370607028753994},
{'a': 0.5661891699685055,
'f1': 0.12139917695473253,
'p': 0.06573083778966132,
'r': 0.793010752688172},
{'a': 0.5645177312009536,
'f1': 0.16622289844047167,
'p': 0.09192259150189314,
'r': 0.8670634920634921},
{'a': 0.5425956576769545,
'f1': 0.11157337367624812,
'p': 0.059995932479153954,
'r': 0.7951482479784366},
{'a': 0.5201313937118723,
'f1': 0.0501579045142114,
'p': 0.025921658986175114,
'r': 0.7714285714285715},
{'a': 0.5346083788706739,
'f1': 0.19119974675530232,
'p': 0.10803076372741907,
'r': 0.8308115543328748},
{'a': 0.5627691984452148,
'f1': 0.05063868613138687,
'p': 0.02616690240452617,
'r': 0.7816901408450704},
{'a': 0.5659126365054602,
'f1': 0.159682899207248,
'p': 0.08790523690773068,
'r': 0.8703703703703703},
{'a': 0.5280909612077203,
'f1': 0.09525554130793186,
'p': 0.05061319836480436,
'r': 0.8074534161490683},
{'a': 0.5116564417177915,
'f1': 0.16935002981514607,
'p': 0.09424257507881201,
'r': 0.8340675477239354},
{'a': 0.5793736501079914,
'f1': 0.3330479452054795,
'p': 0.20717202201313686,
'r': 0.8487272727272728},
{'a': 0.5667684090041969,
'f1': 0.1768756795940558,
'p': 0.09878542510121457,
'r': 0.8442906574394463},
{'a': 0.5547138908793121,
'f1': 0.05884898312418867,
'p': 0.030665163472378805,
'r': 0.7272727272727273},
{'a': 0.5545847039473685,
'f1': 0.05824820691154097,
'p': 0.030330466274332276,
'r': 0.73224043715847},
{'a': 0.5695238095238095,
'f1': 0.1908342284282134,
'p': 0.10769852495453627,
'r': 0.8367346938775511}],
'news.bbc.co.uk;2000': [{'a': 0.9118311981914092,
'f1': 0.8494208494208494,
'p': 0.88,
'r': 0.8208955223880597},
{'a': 0.9057377049180327,
'f1': 0.8410138248847927,
'p': 0.8837772397094431,
'r': 0.8021978021978022},
{'a': 0.9044198895027624,
'f1': 0.9006318207926479,
'p': 0.9223529411764706,
'r': 0.8799102132435466},
{'a': 0.9266358228684732,
'f1': 0.9125295508274233,
'p': 0.9429967426710097,
'r': 0.8839694656488549},
{'a': 0.9117997616209773,
'f1': 0.9022457067371201,
'p': 0.918010752688172,
'r': 0.887012987012987},
{'a': 0.8712029161603888,
'f1': 0.825944170771757,
'p': 0.8152350081037277,
'r': 0.8369384359400999},
{'a': 0.9092284417549168,
'f1': 0.8529411764705883,
'p': 0.8969072164948454,
'r': 0.8130841121495327},
{'a': 0.9022482893450635,
'f1': 0.8511904761904762,
'p': 0.8746177370030581,
'r': 0.8289855072463768},
{'a': 0.8823114869626497,
'f1': 0.7930607187112763,
'p': 0.8142493638676844,
'r': 0.7729468599033816},
{'a': 0.9267782426778243,
'f1': 0.9076517150395779,
'p': 0.9502762430939227,
'r': 0.8686868686868687},
{'a': 0.9225251076040172,
'f1': 0.8689320388349514,
'p': 0.9132653061224489,
'r': 0.8287037037037037},
{'a': 0.8396866840731071,
'f1': 0.8188790560471977,
'p': 0.7797752808988764,
'r': 0.8621118012422361},
{'a': 0.9097978227060654,
'f1': 0.8284023668639052,
'p': 0.8588957055214724,
'r': 0.8},
{'a': 0.8773255813953489,
'f1': 0.8451944240645635,
'p': 0.8458149779735683,
'r': 0.844574780058651},
{'a': 0.9183238636363636,
'f1': 0.8762109795479011,
'p': 0.9187358916478555,
'r': 0.8374485596707819},
{'a': 0.9077791718946048,
'f1': 0.8746803069053709,
'p': 0.9210053859964094,
'r': 0.8327922077922078},
{'a': 0.9012187299550994,
'f1': 0.8555347091932457,
'p': 0.9101796407185628,
'r': 0.8070796460176991},
{'a': 0.8915866741953699,
'f1': 0.8748370273794003,
'p': 0.8946666666666667,
'r': 0.8558673469387755},
{'a': 0.9106292966684294,
'f1': 0.9038133181559476,
'p': 0.928654970760234,
'r': 0.8802660753880266},
{'a': 0.8931178310740354,
'f1': 0.8686739269698911,
'p': 0.9125168236877523,
'r': 0.8288508557457213},
{'a': 0.8817005545286506,
'f1': 0.8502340093603743,
'p': 0.8596214511041009,
'r': 0.8410493827160493},
{'a': 0.9145496535796767,
'f1': 0.8948863636363636,
'p': 0.9402985074626866,
'r': 0.8536585365853658},
{'a': 0.9274905422446406,
'f1': 0.9117421335379894,
'p': 0.9565217391304348,
'r': 0.8709677419354839},
{'a': 0.8952116585704372,
'f1': 0.8422152560083594,
'p': 0.8448637316561844,
'r': 0.8395833333333333},
{'a': 0.9093789607097592,
'f1': 0.8686868686868686,
'p': 0.9148936170212766,
'r': 0.8269230769230769}],
'news.bbc.co.uk;2005': [{'a': 0.8391019644527596,
'f1': 0.7152317880794702,
'p': 0.6189111747851003,
'r': 0.8470588235294118},
{'a': 0.8142076502732241,
'f1': 0.7186761229314422,
'p': 0.6166328600405679,
'r': 0.8611898016997167},
{'a': 0.7044967880085653,
'f1': 0.7008670520231215,
'p': 0.5652680652680653,
'r': 0.9220532319391636},
{'a': 0.8440366972477065,
'f1': 0.8042226487523992,
'p': 0.7325174825174825,
'r': 0.8914893617021277},
{'a': 0.8418952618453865,
'f1': 0.8483978957436633,
'p': 0.8063636363636364,
'r': 0.8950554994954592},
{'a': 0.8062460165710643,
'f1': 0.8033635187580855,
'p': 0.711340206185567,
'r': 0.9227340267459139},
{'a': 0.9006181645268664,
'f1': 0.914238818219122,
'p': 0.9034874290348743,
'r': 0.925249169435216},
{'a': 0.82328190743338,
'f1': 0.7700729927007299,
'p': 0.6963696369636964,
'r': 0.8612244897959184},
{'a': 0.8155339805825242,
'f1': 0.7031250000000001,
'p': 0.6094808126410836,
'r': 0.8307692307692308},
{'a': 0.836343732895457,
'f1': 0.8280621046578494,
'p': 0.7903402854006586,
'r': 0.8695652173913043},
{'a': 0.8426966292134831,
'f1': 0.8038528896672504,
'p': 0.765,
'r': 0.8468634686346863},
{'a': 0.8250950570342205,
'f1': 0.7415730337078651,
'p': 0.6626506024096386,
'r': 0.8418367346938775},
{'a': 0.8407202216066482,
'f1': 0.789762340036563,
'p': 0.7728085867620751,
'r': 0.8074766355140187},
{'a': 0.8260200153964589,
'f1': 0.755939524838013,
'p': 0.6809338521400778,
'r': 0.8495145631067961},
{'a': 0.7973811164713991,
'f1': 0.6931106471816284,
'p': 0.5981981981981982,
'r': 0.8238213399503722},
{'a': 0.8313349320543565,
'f1': 0.7365792759051186,
'p': 0.6526548672566371,
'r': 0.8452722063037249},
{'a': 0.7385358004827032,
'f1': 0.6277205040091639,
'p': 0.4823943661971831,
'r': 0.898360655737705},
{'a': 0.7896613190730838,
'f1': 0.7941860465116279,
'p': 0.6905965621840243,
'r': 0.9343365253077975},
{'a': 0.6781193490054249,
'f1': 0.5180505415162455,
'p': 0.38523489932885907,
'r': 0.790633608815427},
{'a': 0.794679005205321,
'f1': 0.8104644954618259,
'p': 0.711340206185567,
'r': 0.9416873449131513},
{'a': 0.8725854383358098,
'f1': 0.8961550105964273,
'p': 0.8716136631330977,
'r': 0.9221183800623053},
{'a': 0.7995110024449877,
'f1': 0.638235294117647,
'p': 0.5331695331695332,
'r': 0.7948717948717948},
{'a': 0.8473439917483239,
'f1': 0.8435517970401691,
'p': 0.8093306288032455,
'r': 0.8807947019867549},
{'a': 0.709572742022715,
'f1': 0.6546623794212219,
'p': 0.5146612740141557,
'r': 0.8992932862190812},
{'a': 0.8297376093294461,
'f1': 0.8027027027027026,
'p': 0.7342398022249691,
'r': 0.8852459016393442}],
'news.bbc.co.uk;2010': [{'a': 0.7625243981782693,
'f1': 0.7058823529411765,
'p': 0.6311239193083573,
'r': 0.8007312614259597},
{'a': 0.7582283624542869,
'f1': 0.5697758496023138,
'p': 0.4586728754365541,
'r': 0.7519083969465649},
{'a': 0.7143962848297214,
'f1': 0.45089285714285715,
'p': 0.33554817275747506,
'r': 0.6870748299319728},
{'a': 0.706855791962175,
'f1': 0.5267175572519084,
'p': 0.3942857142857143,
'r': 0.7931034482758621},
{'a': 0.7664473684210527,
'f1': 0.32746955345060896,
'p': 0.21530249110320285,
'r': 0.6836158192090396},
{'a': 0.7461730153079388,
'f1': 0.6067291781577496,
'p': 0.5018248175182481,
'r': 0.7670850767085077},
{'a': 0.6400894187779433,
'f1': 0.02424242424242424,
'p': 0.013729977116704805,
'r': 0.10344827586206896},
{'a': 0.7614255765199162,
'f1': 0.4878487848784878,
'p': 0.3702185792349727,
'r': 0.7150395778364116},
{'a': 0.7620689655172413,
'f1': 0.463035019455253,
'p': 0.3380681818181818,
'r': 0.7345679012345679},
{'a': 0.7661676646706587,
'f1': 0.7279693486590039,
'p': 0.6626506024096386,
'r': 0.8075734157650696},
{'a': 0.7575757575757576,
'f1': 0.456,
'p': 0.33727810650887574,
'r': 0.7037037037037037},
{'a': 0.7598484848484849,
'f1': 0.6313953488372093,
'p': 0.5386904761904762,
'r': 0.7626404494382022},
{'a': 0.754180602006689,
'f1': 0.48148148148148145,
'p': 0.3684210526315789,
'r': 0.6946564885496184},
{'a': 0.7579535683576956,
'f1': 0.49141824751580854,
'p': 0.37006802721088433,
'r': 0.7311827956989247},
{'a': 0.7263339070567987,
'f1': 0.29646017699115046,
'p': 0.18457300275482094,
'r': 0.7528089887640449},
{'a': 0.7547416612164813,
'f1': 0.6553308823529411,
'p': 0.5882838283828383,
'r': 0.7396265560165975},
{'a': 0.7636441770519983,
'f1': 0.5454545454545453,
'p': 0.4263565891472868,
'r': 0.7568807339449541},
{'a': 0.6528982992016661,
'f1': 0.4959677419354839,
'p': 0.3614988978692138,
'r': 0.7897271268057785},
{'a': 0.6826923076923077,
'f1': 0.5428424833247819,
'p': 0.41884402216943784,
'r': 0.7711370262390671},
{'a': 0.7665213015766521,
'f1': 0.7015437392795882,
'p': 0.621580547112462,
'r': 0.8051181102362205},
{'a': 0.7759719566602932,
'f1': 0.7356148928168484,
'p': 0.6694045174537988,
'r': 0.8163606010016694},
{'a': 0.7036881810561609,
'f1': 0.4532095901005414,
'p': 0.32447397563676633,
'r': 0.7512820512820513},
{'a': 0.7713230355943587,
'f1': 0.6939325842696629,
'p': 0.628152969894223,
'r': 0.7751004016064257},
{'a': 0.6798048048048048,
'f1': 0.5066512434933488,
'p': 0.3721325403568394,
'r': 0.7934782608695652},
{'a': 0.6916354556803995,
'f1': 0.43650190114068443,
'p': 0.30434782608695654,
'r': 0.771505376344086}],
'news.bbc.co.uk;2015': [{'a': 0.5555871077665767,
'f1': 0.23658536585365852,
'p': 0.14049826187717265,
'r': 0.7484567901234568},
{'a': 0.5497448979591837,
'f1': 0.20236003012804415,
'p': 0.11711711711711711,
'r': 0.7435424354243543},
{'a': 0.563236936825121,
'f1': 0.3763955342902711,
'p': 0.24402573529411764,
'r': 0.8226181254841208},
{'a': 0.5506756756756757,
'f1': 0.14048890137679126,
'p': 0.07836990595611286,
'r': 0.6775067750677507},
{'a': 0.5788987191337647,
'f1': 0.36838978015448604,
'p': 0.23779084633086167,
'r': 0.8172231985940246},
{'a': 0.5673118905545045,
'f1': 0.1927776269345642,
'p': 0.11216429699842022,
'r': 0.6853281853281853},
{'a': 0.5444834855938159,
'f1': 0.19995062947420392,
'p': 0.11571428571428571,
'r': 0.73502722323049},
{'a': 0.5694312474548663,
'f1': 0.2543488481429243,
'p': 0.15510321100917432,
'r': 0.706266318537859},
{'a': 0.5528025381477565,
'f1': 0.11217756448710257,
'p': 0.061131088591042826,
'r': 0.68},
{'a': 0.5627456909585727,
'f1': 0.1770062606715993,
'p': 0.10103963612735542,
'r': 0.713302752293578},
{'a': 0.48321324543921507,
'f1': 0.06335093081411503,
'p': 0.03328467153284671,
'r': 0.6551724137931034},
{'a': 0.5497154836777478,
'f1': 0.08183206106870229,
'p': 0.043762246897452645,
'r': 0.6291079812206573},
{'a': 0.558078141499472,
'f1': 0.2945638432364096,
'p': 0.1813700051894136,
'r': 0.7836322869955157},
{'a': 0.5500292568753657,
'f1': 0.15864332603938733,
'p': 0.08876645240281604,
'r': 0.7455012853470437},
{'a': 0.5471483430521974,
'f1': 0.1939240506329114,
'p': 0.1114019778941245,
'r': 0.748046875},
{'a': 0.5554911619820342,
'f1': 0.1520176893311222,
'p': 0.08550995024875623,
'r': 0.6840796019900498},
{'a': 0.5763792625450513,
'f1': 0.26503126503126506,
'p': 0.1626808385001476,
'r': 0.7146562905317769},
{'a': 0.5585113353426812,
'f1': 0.31202777210537064,
'p': 0.19435258204019334,
'r': 0.7908902691511387},
{'a': 0.5703114281794485,
'f1': 0.30182790905037893,
'p': 0.18701657458563536,
'r': 0.7817551963048499},
{'a': 0.5642479213907785,
'f1': 0.25210810810810813,
'p': 0.1531791907514451,
'r': 0.7118437118437119},
{'a': 0.5640256959314776,
'f1': 0.23726273726273728,
'p': 0.14061574896388396,
'r': 0.7587859424920128},
{'a': 0.5690190257725003,
'f1': 0.2325804901489668,
'p': 0.1399652978600347,
'r': 0.6875},
{'a': 0.5737860137968348,
'f1': 0.3251231527093596,
'p': 0.2042518837459634,
'r': 0.7964323189926548},
{'a': 0.5553221288515406,
'f1': 0.24530544330877105,
'p': 0.1462999716472923,
'r': 0.7588235294117647},
{'a': 0.5427863292460214,
'f1': 0.1616838077015068,
'p': 0.09164859002169197,
'r': 0.6855983772819473}],
'news.yahoo.com;2000': [{'a': 0.9225543478260869,
'f1': 0.9279393173198484,
'p': 0.9607329842931938,
'r': 0.8973105134474327},
{'a': 0.8682457438934122,
'f1': 0.8540983606557376,
'p': 0.837620578778135,
'r': 0.8712374581939799},
{'a': 0.9199632014719411,
'f1': 0.9085173501577286,
'p': 0.9230769230769231,
'r': 0.8944099378881988},
{'a': 0.8974093264248705,
'f1': 0.8626907073509015,
'p': 0.8405405405405405,
'r': 0.886039886039886},
{'a': 0.9158878504672897,
'f1': 0.8979591836734695,
'p': 0.9145496535796767,
'r': 0.8819599109131403},
{'a': 0.9133271202236719,
'f1': 0.8963210702341137,
'p': 0.919908466819222,
'r': 0.8739130434782608},
{'a': 0.8031042128603104,
'f1': 0.8314350797266515,
'p': 0.909468438538206,
'r': 0.7657342657342657},
{'a': 0.8618903754855416,
'f1': 0.8683127572016461,
'p': 0.8591205211726385,
'r': 0.8777038269550749},
{'a': 0.8721804511278195,
'f1': 0.859338061465721,
'p': 0.8644470868014269,
'r': 0.854289071680376},
{'a': 0.877562028047465,
'f1': 0.8540192926045016,
'p': 0.8634590377113134,
'r': 0.8447837150127226},
{'a': 0.8776978417266187,
'f1': 0.8478747203579418,
'p': 0.838495575221239,
'r': 0.8574660633484162},
{'a': 0.920123839009288,
'f1': 0.9315649867374005,
'p': 0.9430719656283566,
'r': 0.9203354297693921},
{'a': 0.8999055712936733,
'f1': 0.8582887700534759,
'p': 0.856,
'r': 0.8605898123324397},
{'a': 0.9074235807860263,
'f1': 0.8960784313725491,
'p': 0.8943248532289628,
'r': 0.8978388998035364},
{'a': 0.8967314069161535,
'f1': 0.9213564213564215,
'p': 0.8948843728100911,
'r': 0.9494423791821561},
{'a': 0.8570649208947081,
'f1': 0.844418052256532,
'p': 0.8535414165666266,
'r': 0.8354876615746181},
{'a': 0.9123867069486404,
'f1': 0.8642745709828393,
'p': 0.9111842105263158,
'r': 0.8219584569732937},
{'a': 0.8925714285714286,
'f1': 0.8164062500000001,
'p': 0.8038461538461539,
'r': 0.8293650793650794},
{'a': 0.9205298013245033,
'f1': 0.9025522041763342,
'p': 0.9131455399061033,
'r': 0.8922018348623854},
{'a': 0.8363959691760522,
'f1': 0.7730263157894737,
'p': 0.7617504051863857,
'r': 0.7846410684474123},
{'a': 0.8599656357388317,
'f1': 0.6433260393873085,
'p': 0.6099585062240664,
'r': 0.6805555555555556},
{'a': 0.90089358245329,
'f1': 0.8842504743833016,
'p': 0.8944337811900192,
'r': 0.874296435272045},
{'a': 0.8973544973544973,
'f1': 0.8422764227642278,
'p': 0.8248407643312102,
'r': 0.8604651162790697},
{'a': 0.9201773835920177,
'f1': 0.924791086350975,
'p': 0.924791086350975,
'r': 0.924791086350975},
{'a': 0.9150214592274678,
'f1': 0.9247148288973385,
'p': 0.8928046989720999,
'r': 0.9589905362776026}],
'news.yahoo.com;2005': [{'a': 0.826288899210404,
'f1': 0.8064182194616977,
'p': 0.7369914853358562,
'r': 0.8902857142857142},
{'a': 0.7051349920592906,
'f1': 0.6634441087613293,
'p': 0.5294117647058824,
'r': 0.8883495145631068},
{'a': 0.8285714285714286,
'f1': 0.827937095282146,
'p': 0.766923736075407,
'r': 0.8994974874371859},
{'a': 0.7835408022130014,
'f1': 0.7016205910390848,
'p': 0.5832012678288431,
'r': 0.8803827751196173},
{'a': 0.7626582278481012,
'f1': 0.5689655172413792,
'p': 0.42950108459869846,
'r': 0.8425531914893617},
{'a': 0.8167247386759582,
'f1': 0.7674624226348364,
'p': 0.6625954198473283,
'r': 0.9117647058823529},
{'a': 0.6875776397515528,
'f1': 0.5755274261603376,
'p': 0.4289308176100629,
'r': 0.8743589743589744},
{'a': 0.7848872638634978,
'f1': 0.7252918287937743,
'p': 0.6044098573281452,
'r': 0.9066147859922179},
{'a': 0.915273132664437,
'f1': 0.8756137479541735,
'p': 0.8784893267651889,
'r': 0.8727569331158238},
{'a': 0.7367066895368782,
'f1': 0.7725925925925926,
'p': 0.6530995616781465,
'r': 0.9456029011786038},
{'a': 0.7989203778677463,
'f1': 0.6179487179487179,
'p': 0.5205183585313174,
'r': 0.7602523659305994},
{'a': 0.6863844977052524,
'f1': 0.7047527604416708,
'p': 0.5707620528771384,
'r': 0.9209535759096612},
{'a': 0.9134049186006234,
'f1': 0.9062265566391597,
'p': 0.94375,
'r': 0.8715728715728716},
{'a': 0.8586535072259429,
'f1': 0.8875175315568024,
'p': 0.8401486988847584,
'r': 0.9405469678953626},
{'a': 0.8676420551207894,
'f1': 0.8979805927091529,
'p': 0.852165256346441,
'r': 0.9490022172949002},
{'a': 0.7374233128834355,
'f1': 0.7720170454545455,
'p': 0.6516786570743405,
'r': 0.9468641114982579},
{'a': 0.7749169435215947,
'f1': 0.47984644913627633,
'p': 0.36231884057971014,
'r': 0.7102272727272727},
{'a': 0.7755662319835278,
'f1': 0.6765578635014837,
'p': 0.5652892561983471,
'r': 0.8423645320197044},
{'a': 0.8507462686567164,
'f1': 0.8557692307692308,
'p': 0.7837573385518591,
'r': 0.9423529411764706},
{'a': 0.8466257668711656,
'f1': 0.8619957537154989,
'p': 0.79296875,
'r': 0.9441860465116279},
{'a': 0.6668734491315137,
'f1': 0.5095890410958904,
'p': 0.3661417322834646,
'r': 0.8378378378378378},
{'a': 0.8207322872087494,
'f1': 0.7851851851851852,
'p': 0.7298728813559322,
'r': 0.8495684340320592},
{'a': 0.8586556169429097,
'f1': 0.8662309368191722,
'p': 0.8174342105263158,
'r': 0.9212233549582948},
{'a': 0.6675409836065573,
'f1': 0.5053658536585366,
'p': 0.36022253129346316,
'r': 0.8464052287581699},
{'a': 0.8410117434507678,
'f1': 0.8462882096069869,
'p': 0.7795655671761866,
'r': 0.9255014326647565}],
'news.yahoo.com;2010': [{'a': 0.8125,
'f1': 0.7473982970671712,
'p': 0.6954225352112676,
'r': 0.8077709611451943},
{'a': 0.792352371732817,
'f1': 0.49230769230769234,
'p': 0.37749546279491836,
'r': 0.7074829931972789},
{'a': 0.7949526813880127,
'f1': 0.6627756160830091,
'p': 0.5774011299435028,
'r': 0.7777777777777778},
{'a': 0.7983315197678637,
'f1': 0.7148717948717948,
'p': 0.6606635071090048,
'r': 0.7787709497206704},
{'a': 0.8179453836150845,
'f1': 0.7824397824397824,
'p': 0.7437223042836041,
'r': 0.8254098360655737},
{'a': 0.801693404634581,
'f1': 0.6120313862249346,
'p': 0.5043103448275862,
'r': 0.7782705099778271},
{'a': 0.8084656084656084,
'f1': 0.7454289732770745,
'p': 0.6824034334763949,
'r': 0.8212809917355371},
{'a': 0.7891472868217054,
'f1': 0.6472114137483787,
'p': 0.558165548098434,
'r': 0.7700617283950617},
{'a': 0.8128453038674033,
'f1': 0.7665805340223945,
'p': 0.7212317666126418,
'r': 0.8180147058823529},
{'a': 0.7882805816937554,
'f1': 0.5607808340727595,
'p': 0.4520743919885551,
'r': 0.7383177570093458},
{'a': 0.7756706753006476,
'f1': 0.41495778045838355,
'p': 0.29965156794425085,
'r': 0.6745098039215687},
{'a': 0.7818003913894325,
'f1': 0.4677804295942721,
'p': 0.3391003460207612,
'r': 0.7538461538461538},
{'a': 0.8298582151793161,
'f1': 0.8300943920044419,
'p': 0.806799784133837,
'r': 0.8547741566609491},
{'a': 0.7997992975413949,
'f1': 0.4017991004497751,
'p': 0.2809224318658281,
'r': 0.7052631578947368},
{'a': 0.8048540505083634,
'f1': 0.7550432276657061,
'p': 0.7005347593582888,
'r': 0.81875},
{'a': 0.8002373417721519,
'f1': 0.6918852959121414,
'p': 0.6203501094091903,
'r': 0.7820689655172414},
{'a': 0.7970494417862839,
'f1': 0.6688353936239428,
'p': 0.5881006864988558,
'r': 0.77526395173454},
{'a': 0.7986111111111112,
'f1': 0.6393562545720556,
'p': 0.5588235294117647,
'r': 0.747008547008547},
{'a': 0.799609375,
'f1': 0.673871582962492,
'p': 0.6057142857142858,
'r': 0.7593123209169055},
{'a': 0.7792207792207793,
'f1': 0.6678507992895204,
'p': 0.5784615384615385,
'r': 0.7899159663865546},
{'a': 0.8131220051603391,
'f1': 0.7050610820244329,
'p': 0.637223974763407,
'r': 0.7890625},
{'a': 0.7875927174645988,
'f1': 0.7403132728771641,
'p': 0.6834094368340944,
'r': 0.8075539568345323},
{'a': 0.8000719165767709,
'f1': 0.728780487804878,
'p': 0.6772438803263826,
'r': 0.7888067581837381},
{'a': 0.8107739515854074,
'f1': 0.754750331418471,
'p': 0.7,
'r': 0.8187919463087249},
{'a': 0.8030973451327433,
'f1': 0.5250800426894343,
'p': 0.41765704584040747,
'r': 0.7068965517241379}],
'news.yahoo.com;2015': [{'a': 0.3048423700544117,
'f1': 0.035215543412264724,
'p': 0.018075574600701208,
'r': 0.6803519061583577},
{'a': 0.34525586353944565,
'f1': 0.025003968883949835,
'p': 0.012745296378717378,
'r': 0.6548856548856549},
{'a': 0.2560697667057073,
'f1': 0.01878796735068785,
'p': 0.009526228883525974,
'r': 0.6766917293233082},
{'a': 0.3435495898583147,
'f1': 0.04774095842498107,
'p': 0.024713480419606526,
'r': 0.6997885835095138},
{'a': 0.38345512460183623,
'f1': 0.02623921085080148,
'p': 0.013366162504396765,
'r': 0.7112299465240641},
{'a': 0.3304576046566016,
'f1': 0.06543344214726152,
'p': 0.03422760217053087,
'r': 0.7411668036154478},
{'a': 0.3483621870718645,
'f1': 0.052654450640979206,
'p': 0.027355508729680914,
'r': 0.7003853564547207},
{'a': 0.3494353008685673,
'f1': 0.056013927787449846,
'p': 0.029069767441860465,
'r': 0.7660455486542443},
{'a': 0.2613521237506237,
'f1': 0.03416572750459695,
'p': 0.017480678185570347,
'r': 0.7506516072980017},
{'a': 0.38774996063612027,
'f1': 0.028579994004197064,
'p': 0.014579934747145187,
'r': 0.7185929648241206},
{'a': 0.2916898903840539,
'f1': 0.008692099104788083,
'p': 0.004374976629398347,
'r': 0.6573033707865169},
{'a': 0.34022892717958775,
'f1': 0.036302448804238864,
'p': 0.01866166077738516,
'r': 0.6636125654450262},
{'a': 0.3620855236554792,
'f1': 0.04883747220861439,
'p': 0.02526020348497252,
'r': 0.7330316742081447},
{'a': 0.33174694993689524,
'f1': 0.04314596588983848,
'p': 0.022292250233426705,
'r': 0.6686114352392065},
{'a': 0.24957875777119284,
'f1': 0.029311187103077677,
'p': 0.014970059880239521,
'r': 0.6976744186046512},
{'a': 0.3363527076518773,
'f1': 0.022886309376800855,
'p': 0.011643979057591623,
'r': 0.6634844868735084},
{'a': 0.3411867364746946,
'f1': 0.03392680875955105,
'p': 0.01741349545898071,
'r': 0.6562942008486563},
{'a': 0.2571102978941962,
'f1': 0.013344418153524759,
'p': 0.00674612582710089,
'r': 0.6089494163424124},
{'a': 0.3310363836824697,
'f1': 0.03394246426632894,
'p': 0.017437537180249853,
'r': 0.6346414073071719},
{'a': 0.34860527514807876,
'f1': 0.04199240562876927,
'p': 0.02163157289149695,
'r': 0.714828897338403},
{'a': 0.33099696356275304,
'f1': 0.01856045139017781,
'p': 0.009425426029256523,
'r': 0.6024096385542169},
{'a': 0.2689490523443717,
'f1': 0.024784973026843165,
'p': 0.012613009922822492,
'r': 0.7087980173482032},
{'a': 0.34358827597720065,
'f1': 0.07051195461299474,
'p': 0.03700552956188856,
'r': 0.7457142857142857},
{'a': 0.34750822755054067,
'f1': 0.06406581919951444,
'p': 0.03352152434721242,
'r': 0.721336370539104},
{'a': 0.25777743020254945,
'f1': 0.010921672433198549,
'p': 0.005512423993772223,
'r': 0.5835189309576837}],
'thenation.com;2000': [{'a': 0.948016415868673,
'f1': 0.963035019455253,
'p': 0.9611650485436893,
'r': 0.9649122807017544},
{'a': 0.900839054157132,
'f1': 0.9214975845410629,
'p': 0.8965922444183314,
'r': 0.9478260869565217},
{'a': 0.8160337552742616,
'f1': 0.844950213371266,
'p': 0.75,
'r': 0.9674267100977199},
{'a': 0.9159420289855073,
'f1': 0.9504950495049507,
'p': 0.9200264375413086,
'r': 0.9830508474576272},
{'a': 0.7521212121212121,
'f1': 0.8069844266163284,
'p': 0.6939935064935064,
'r': 0.963923337091319},
{'a': 0.8325673013788575,
'f1': 0.8754274548119199,
'p': 0.7950310559006211,
'r': 0.9739130434782609},
{'a': 0.9291457286432161,
'f1': 0.9593190998268898,
'p': 0.9308510638297872,
'r': 0.9895833333333334},
{'a': 0.9315551082033215,
'f1': 0.9603960396039604,
'p': 0.9371980676328503,
'r': 0.9847715736040609},
{'a': 0.8867091711623345,
'f1': 0.925459825750242,
'p': 0.8749237339841367,
'r': 0.9821917808219178},
{'a': 0.9161966156325544,
'f1': 0.9312169312169312,
'p': 0.9130998702983139,
'r': 0.9500674763832658},
{'a': 0.9013710747456878,
'f1': 0.9372007885102787,
'p': 0.8946236559139785,
'r': 0.984033116499113},
{'a': 0.7989382879893829,
'f1': 0.8438948995363215,
'p': 0.7465815861440291,
'r': 0.9703791469194313},
{'a': 0.92643391521197,
'f1': 0.8747346072186836,
'p': 0.8841201716738197,
'r': 0.865546218487395},
{'a': 0.9326113116726835,
'f1': 0.8828451882845187,
'p': 0.8865546218487395,
'r': 0.8791666666666667},
{'a': 0.929305912596401,
'f1': 0.9499089253187614,
'p': 0.924645390070922,
'r': 0.9765917602996255},
{'a': 0.8337819650067295,
'f1': 0.8753154972236243,
'p': 0.7903372835004557,
'r': 0.9807692307692307},
{'a': 0.9232209737827716,
'f1': 0.9459815546772069,
'p': 0.9220890410958904,
'r': 0.9711451758340848},
{'a': 0.9671549045716822,
'f1': 0.9795353982300885,
'p': 0.9838888888888889,
'r': 0.9752202643171806},
{'a': 0.8517538054268696,
'f1': 0.8909444985394352,
'p': 0.8198924731182796,
'r': 0.9754797441364605},
{'a': 0.9136400322841001,
'f1': 0.9288090485695276,
'p': 0.9148099606815203,
'r': 0.9432432432432433},
{'a': 0.949293246578416,
'f1': 0.9712248535777948,
'p': 0.9556502129792032,
'r': 0.9873155578565881},
{'a': 0.9605055292259084,
'f1': 0.9787835926449787,
'p': 0.9651324965132496,
'r': 0.9928263988522238},
{'a': 0.9318840579710145,
'f1': 0.9566020313942751,
'p': 0.9316546762589928,
'r': 0.9829222011385199},
{'a': 0.847394540942928,
'f1': 0.8894878706199462,
'p': 0.8256880733944955,
'r': 0.9639727361246349},
{'a': 0.8889570552147239,
'f1': 0.9224174882126017,
'p': 0.8762214983713354,
'r': 0.9737556561085973}],
'thenation.com;2005': [{'a': 0.7414854329093147,
'f1': 0.8374613003095975,
'p': 0.7300944669365722,
'r': 0.9818511796733213},
{'a': 0.5137777777777778,
'f1': 0.6188153310104529,
'p': 0.4563206577595067,
'r': 0.961038961038961},
{'a': 0.7658473479948253,
'f1': 0.852725793327909,
'p': 0.7561327561327561,
'r': 0.9776119402985075},
{'a': 0.849610270518111,
'f1': 0.9101861993428259,
'p': 0.8483920367534457,
'r': 0.9816893089190786},
{'a': 0.5731292517006803,
'f1': 0.6714659685863875,
'p': 0.5202839756592292,
'r': 0.9464944649446494},
{'a': 0.9057798891528107,
'f1': 0.9227774172615184,
'p': 0.8876404494382022,
'r': 0.9608108108108108},
{'a': 0.7099871959026889,
'f1': 0.8026143790849674,
'p': 0.6842496285289748,
'r': 0.9704952581664911},
{'a': 0.8706038487060385,
'f1': 0.8898927159796725,
'p': 0.8668866886688669,
'r': 0.91415313225058},
{'a': 0.7269180754226268,
'f1': 0.8161120840630474,
'p': 0.7039274924471299,
'r': 0.9708333333333333},
{'a': 0.9004950495049505,
'f1': 0.9133247089262614,
'p': 0.9168831168831169,
'r': 0.9097938144329897},
{'a': 0.826677994902294,
'f1': 0.8794326241134752,
'p': 0.8275862068965517,
'r': 0.9382093316519546},
{'a': 0.6467889908256881,
'f1': 0.7636224098234843,
'p': 0.6269691241335854,
'r': 0.9764474975466143},
{'a': 0.8954685890834192,
'f1': 0.913946587537092,
'p': 0.9120135363790186,
'r': 0.9158878504672897},
{'a': 0.7067342505430847,
'f1': 0.8055688910225637,
'p': 0.6894001643385373,
'r': 0.9688221709006929},
{'a': 0.7941558441558442,
'f1': 0.8731492597038816,
'p': 0.7905797101449276,
'r': 0.9749776586237712},
{'a': 0.9431714023831348,
'f1': 0.9671610169491526,
'p': 0.9620653319283456,
'r': 0.972310969116081},
{'a': 0.8959881129271917,
'f1': 0.9042407660738714,
'p': 0.8789893617021277,
'r': 0.9309859154929577},
{'a': 0.9060481503229595,
'f1': 0.9272727272727272,
'p': 0.9082813891362422,
'r': 0.947075208913649},
{'a': 0.8342046303211351,
'f1': 0.8497970230040597,
'p': 0.7733990147783252,
'r': 0.9429429429429429},
{'a': 0.8912901113294041,
'f1': 0.9011904761904763,
'p': 0.8822843822843823,
'r': 0.9209245742092458},
{'a': 0.842873831775701,
'f1': 0.8663686040735221,
'p': 0.8126747437092264,
'r': 0.9276595744680851},
{'a': 0.905373831775701,
'f1': 0.9209756097560975,
'p': 0.9129593810444874,
'r': 0.9291338582677166},
{'a': 0.865615141955836,
'f1': 0.8735905044510386,
'p': 0.8382687927107062,
'r': 0.9120198265179678},
{'a': 0.8798283261802575,
'f1': 0.89937106918239,
'p': 0.8674176776429809,
'r': 0.933768656716418},
{'a': 0.7283018867924528,
'f1': 0.8226600985221675,
'p': 0.712457337883959,
'r': 0.9731934731934732}],
'thenation.com;2010': [{'a': 0.675764192139738,
'f1': 0.6285178236397749,
'p': 0.489766081871345,
'r': 0.8769633507853403},
{'a': 0.6671180931744312,
'f1': 0.6212634822804315,
'p': 0.4818355640535373,
'r': 0.8742411101474414},
{'a': 0.6322725012431626,
'f1': 0.586756077116513,
'p': 0.44043624161073824,
'r': 0.8786610878661087},
{'a': 0.7086073777523592,
'f1': 0.647282796815507,
'p': 0.5114879649890591,
'r': 0.88124410933082},
{'a': 0.7182883341823739,
'f1': 0.7089473684210525,
'p': 0.5836221837088388,
'r': 0.9028150134048257},
{'a': 0.8147023086269745,
'f1': 0.45045045045045046,
'p': 0.3246753246753247,
'r': 0.7352941176470589},
{'a': 0.657844387755102,
'f1': 0.49695264885138307,
'p': 0.3559435862995299,
'r': 0.8229813664596274},
{'a': 0.4285228624851266,
'f1': 0.40663607483233327,
'p': 0.2612244897959184,
'r': 0.9171974522292994},
{'a': 0.8819702602230484,
'f1': 0.9008973858759267,
'p': 0.8726379440665155,
'r': 0.9310483870967742},
{'a': 0.8352638352638353,
'f1': 0.787551867219917,
'p': 0.7684210526315789,
'r': 0.8076595744680851},
{'a': 0.7230172927847347,
'f1': 0.6279535442531037,
'p': 0.5275908479138627,
'r': 0.7754698318496538},
{'a': 0.6940684223480187,
'f1': 0.6834733893557422,
'p': 0.5502255022550225,
'r': 0.9018817204301075},
{'a': 0.6265653869841922,
'f1': 0.6345187864175206,
'p': 0.48584615384615387,
'r': 0.9143022582513028},
{'a': 0.6097623966942148,
'f1': 0.5624094989863887,
'p': 0.4090143218197136,
'r': 0.8999073215940686},
{'a': 0.7384384384384385,
'f1': 0.6553225168183617,
'p': 0.553475935828877,
'r': 0.8031037827352085},
{'a': 0.5977851083883129,
'f1': 0.5124250214224507,
'p': 0.3676229508196721,
'r': 0.8454288407163054},
{'a': 0.7416363034117257,
'f1': 0.6842105263157894,
'p': 0.5588624338624338,
'r': 0.8820459290187892},
{'a': 0.7345368452204795,
'f1': 0.712592117910926,
'p': 0.5868073878627968,
'r': 0.9070146818923328},
{'a': 0.36462324393358875,
'f1': 0.40454817474566124,
'p': 0.2586404795306721,
'r': 0.9281464530892448},
{'a': 0.5604063701263042,
'f1': 0.4226469527587451,
'p': 0.28309178743961355,
'r': 0.833570412517781},
{'a': 0.5278008298755187,
'f1': 0.4557627929220469,
'p': 0.31123448726322667,
'r': 0.8508928571428571},
{'a': 0.8262844166903207,
'f1': 0.80875,
'p': 0.7398513436249285,
'r': 0.8917987594762233},
{'a': 0.5724090597117364,
'f1': 0.5379480840543882,
'p': 0.3837742504409171,
'r': 0.8991735537190083},
{'a': 0.7998363785110445,
'f1': 0.78475073313783,
'p': 0.6904024767801857,
'r': 0.9089673913043478},
{'a': 0.7134107027724049,
'f1': 0.6189455636519503,
'p': 0.4935064935064935,
'r': 0.8298850574712644}],
'thenation.com;2015': [{'a': 0.7011661807580175,
'f1': 0.7466007416563658,
'p': 0.631578947368421,
'r': 0.9128463476070529},
{'a': 0.6158984635938544,
'f1': 0.5607333842627961,
'p': 0.42305475504322765,
'r': 0.8312570781426953},
{'a': 0.6486733760292772,
'f1': 0.6437847866419295,
'p': 0.5090464547677261,
'r': 0.8755256518082423},
{'a': 0.7768453502312039,
'f1': 0.83955177933752,
'p': 0.7604282846308276,
'r': 0.9370533260032985},
{'a': 0.6675358539765319,
'f1': 0.6966452533904354,
'p': 0.567222006974041,
'r': 0.9025893958076449},
{'a': 0.6472923164162178,
'f1': 0.6589912280701754,
'p': 0.5306843267108168,
'r': 0.8691250903832248},
{'a': 0.6458094144661309,
'f1': 0.6523943661971832,
'p': 0.5220919747520288,
'r': 0.8693693693693694},
{'a': 0.543138866064092,
'f1': 0.3110285006195787,
'p': 0.19670846394984326,
'r': 0.742603550295858},
{'a': 0.6071055381400209,
'f1': 0.5534441805225654,
'p': 0.40853302162478083,
'r': 0.8576687116564418},
{'a': 0.6504384638645297,
'f1': 0.6545128511655709,
'p': 0.5179754020813624,
'r': 0.8887987012987013},
{'a': 0.5240253853127833,
'f1': 0.14634146341463414,
'p': 0.08272058823529412,
'r': 0.6338028169014085},
{'a': 0.6274137385248496,
'f1': 0.5923103567717354,
'p': 0.45943041375604515,
'r': 0.8333333333333334},
{'a': 0.650899593731863,
'f1': 0.6559908492993995,
'p': 0.5256645279560037,
'r': 0.8722433460076046},
{'a': 0.6512681159420289,
'f1': 0.6526315789473685,
'p': 0.514218009478673,
'r': 0.8930041152263375},
{'a': 0.6519756838905775,
'f1': 0.6560528687293481,
'p': 0.5182724252491694,
'r': 0.8936170212765957},
{'a': 0.5417523652817771,
'f1': 0.2967171717171717,
'p': 0.18905872888173772,
'r': 0.6891495601173021},
{'a': 0.7307525010874293,
'f1': 0.7867723045125732,
'p': 0.6813842482100239,
'r': 0.9307253463732681},
{'a': 0.7786984031334739,
'f1': 0.8467716699697507,
'p': 0.7636876763875823,
'r': 0.9501404494382022},
{'a': 0.7297186280550421,
'f1': 0.7874677002583979,
'p': 0.6924169270093723,
'r': 0.9127667540247099},
{'a': 0.7475834397227795,
'f1': 0.8104109589041095,
'p': 0.7160493827160493,
'r': 0.9334174818554749},
{'a': 0.6403210867551713,
'f1': 0.6274384393987849,
'p': 0.49520444220090865,
'r': 0.856020942408377},
{'a': 0.6655328798185941,
'f1': 0.6894736842105262,
'p': 0.561990561990562,
'r': 0.8917631041524847},
{'a': 0.7591199699135013,
'f1': 0.8191444303261329,
'p': 0.7310987903225806,
'r': 0.9313001605136436},
{'a': 0.5442651548190144,
'f1': 0.23218221895664953,
'p': 0.14044444444444446,
'r': 0.6694915254237288},
{'a': 0.612482853223594,
'f1': 0.5592823712948518,
'p': 0.41589327146171695,
'r': 0.8535714285714285}],
'www.cnn.com;2000': [{'a': 0.8130899937067338,
'f1': 0.7341092211280216,
'p': 0.68561872909699,
'r': 0.789980732177264},
{'a': 0.7033918691363964,
'f1': 0.7619231511874879,
'p': 0.6567909454061251,
'r': 0.9071264367816092},
{'a': 0.9567706842255941,
'f1': 0.9742628259757967,
'p': 0.9855172413793103,
'r': 0.963262554769127},
{'a': 0.7296494355317885,
'f1': 0.6033129904097646,
'p': 0.4798890429958391,
'r': 0.812206572769953},
{'a': 0.8298865910607072,
'f1': 0.7038327526132403,
'p': 0.62217659137577,
'r': 0.8101604278074866},
{'a': 0.8375254928619986,
'f1': 0.8686813186813187,
'p': 0.8187467633350596,
'r': 0.92510239906378},
{'a': 0.7469262295081968,
'f1': 0.77255985267035,
'p': 0.6549570647931303,
'r': 0.941638608305275},
{'a': 0.812,
'f1': 0.8061056105610561,
'p': 0.8196308724832215,
'r': 0.7930194805194806},
{'a': 0.7922141119221411,
'f1': 0.7634349030470916,
'p': 0.7638580931263859,
'r': 0.7630121816168328},
{'a': 0.8499701135684399,
'f1': 0.8163862472567666,
'p': 0.7994269340974212,
'r': 0.8340807174887892},
{'a': 0.7581291759465479,
'f1': 0.7517146776406034,
'p': 0.648776637726914,
'r': 0.8934782608695652},
{'a': 0.8777838131450298,
'f1': 0.8659916617033949,
'p': 0.8453488372093023,
'r': 0.8876678876678876},
{'a': 0.758496395468589,
'f1': 0.7159297395517868,
'p': 0.606776180698152,
'r': 0.8729689807976366},
{'a': 0.8103021297672115,
'f1': 0.8004168837936425,
'p': 0.7427466150870407,
'r': 0.8677966101694915},
{'a': 0.877246653919694,
'f1': 0.896551724137931,
'p': 0.8798228969006958,
'r': 0.9139290407358739},
{'a': 0.7289398280802293,
'f1': 0.6266771902131018,
'p': 0.5,
'r': 0.8393234672304439},
{'a': 0.7263533610945866,
'f1': 0.727810650887574,
'p': 0.5896452540747843,
'r': 0.9505409582689336},
{'a': 0.7041499330655957,
'f1': 0.44191919191919193,
'p': 0.30594405594405594,
'r': 0.7954545454545454},
{'a': 0.8116094986807388,
'f1': 0.7698259187620888,
'p': 0.766367137355584,
'r': 0.7733160621761658},
{'a': 0.8488805970149254,
'f1': 0.7996702390766693,
'p': 0.7601880877742947,
'r': 0.8434782608695652},
{'a': 0.8317631224764468,
'f1': 0.7093023255813954,
'p': 0.613682092555332,
'r': 0.8402203856749312},
{'a': 0.8600891861761427,
'f1': 0.8767795778105055,
'p': 0.8703703703703703,
'r': 0.8832838773491593},
{'a': 0.8866200967221923,
'f1': 0.9090909090909092,
'p': 0.9009393680614859,
'r': 0.9173913043478261},
{'a': 0.7409985597695631,
'f1': 0.7998516045260621,
'p': 0.7085113374958922,
'r': 0.9182282793867121},
{'a': 0.7255568138920347,
'f1': 0.7238890998860615,
'p': 0.6192332683560754,
'r': 0.8711151736745887}],
'www.cnn.com;2005': [{'a': 0.7973986993496749,
'f1': 0.6505608283002589,
'p': 0.5568685376661743,
'r': 0.7821576763485477},
{'a': 0.7814922480620154,
'f1': 0.6538756715272448,
'p': 0.534504391468005,
'r': 0.841897233201581},
{'a': 0.8120333772507685,
'f1': 0.7855711422845693,
'p': 0.7101449275362319,
'r': 0.8789237668161435},
{'a': 0.7939339875111507,
'f1': 0.7072243346007605,
'p': 0.6421173762945915,
'r': 0.7870239774330042},
{'a': 0.7925133689839572,
'f1': 0.6040816326530613,
'p': 0.4860426929392447,
'r': 0.7978436657681941},
{'a': 0.8149480415667466,
'f1': 0.7729279058361942,
'p': 0.701067615658363,
'r': 0.8612021857923498},
{'a': 0.7992213570634038,
'f1': 0.6518804243008679,
'p': 0.5425361155698234,
'r': 0.8164251207729468},
{'a': 0.7974481658692185,
'f1': 0.6186186186186187,
'p': 0.5132890365448505,
'r': 0.7783375314861462},
{'a': 0.8134087237479806,
'f1': 0.8023952095808382,
'p': 0.7397476340694006,
'r': 0.8766355140186916},
{'a': 0.8281767955801105,
'f1': 0.5576102418207681,
'p': 0.45794392523364486,
'r': 0.7127272727272728},
{'a': 0.8291413703382481,
'f1': 0.7895299145299146,
'p': 0.7147001934235977,
'r': 0.8818615751789977},
{'a': 0.8012170385395537,
'f1': 0.6512455516014235,
'p': 0.5414201183431953,
'r': 0.8169642857142857},
{'a': 0.7987890079180252,
'f1': 0.6940509915014165,
'p': 0.6041923551171393,
'r': 0.8153078202995009},
{'a': 0.8547993019197208,
'f1': 0.8256496227996648,
'p': 0.8047385620915033,
'r': 0.8476764199655766},
{'a': 0.8202293202293203,
'f1': 0.7898516036381045,
'p': 0.7313829787234043,
'r': 0.858480749219563},
{'a': 0.7980817768803634,
'f1': 0.6563573883161512,
'p': 0.5568513119533528,
'r': 0.799163179916318},
{'a': 0.8632313056954669,
'f1': 0.8067870826491517,
'p': 0.7543500511770727,
'r': 0.8670588235294118},
{'a': 0.7817047817047817,
'f1': 0.7172859450726979,
'p': 0.6195348837209302,
'r': 0.8516624040920716},
{'a': 0.7955215085444903,
'f1': 0.6320254506892895,
'p': 0.5173611111111112,
'r': 0.8119891008174387},
{'a': 0.7862723214285714,
'f1': 0.5379975874547648,
'p': 0.41838649155722324,
'r': 0.7533783783783784},
{'a': 0.8092676872155565,
'f1': 0.7634684453565932,
'p': 0.6914498141263941,
'r': 0.852233676975945},
{'a': 0.8084622383985441,
'f1': 0.7710712343665034,
'p': 0.6910331384015594,
'r': 0.8720787207872078},
{'a': 0.819971870604782,
'f1': 0.7408906882591093,
'p': 0.6428571428571429,
'r': 0.8742038216560509},
{'a': 0.8246376811594203,
'f1': 0.6657458563535911,
'p': 0.5751789976133651,
'r': 0.7901639344262295},
{'a': 0.8191964285714286,
'f1': 0.8329896907216495,
'p': 0.7816473189607518,
'r': 0.8915510718789408}],
'www.cnn.com;2010': [{'a': 0.7275031685678074,
'f1': 0.7248880358285349,
'p': 0.6343784994400896,
'r': 0.8455223880597015},
{'a': 0.6324081020255063,
'f1': 0.5346628679962013,
'p': 0.39900779588944013,
'r': 0.8100719424460432},
{'a': 0.7277505255781359,
'f1': 0.7082238077356365,
'p': 0.6224422442244224,
'r': 0.8214285714285714},
{'a': 0.6537997587454765,
'f1': 0.702127659574468,
'p': 0.5600165562913907,
'r': 0.9408901251738526},
{'a': 0.5586563307493541,
'f1': 0.14600000000000002,
'p': 0.09193954659949623,
'r': 0.35436893203883496},
{'a': 0.6614678899082569,
'f1': 0.4728571428571429,
'p': 0.338100102145046,
'r': 0.7862232779097387},
{'a': 0.45656706045865186,
'f1': 0.3919129082426127,
'p': 0.25237856785177765,
'r': 0.8765217391304347},
{'a': 0.6976923076923077,
'f1': 0.6330532212885154,
'p': 0.5191424196018377,
'r': 0.8110047846889952},
{'a': 0.6745749308026888,
'f1': 0.6304445442299056,
'p': 0.5254491017964071,
'r': 0.7878787878787878},
{'a': 0.7147385103011094,
'f1': 0.7341996455995274,
'p': 0.646049896049896,
'r': 0.8502051983584131},
{'a': 0.6349760139555168,
'f1': 0.39303843364757074,
'p': 0.26965174129353237,
'r': 0.7245989304812834},
{'a': 0.6150234741784038,
'f1': 0.5858585858585859,
'p': 0.4628307433851323,
'r': 0.7979724837074583},
{'a': 0.6288178224937119,
'f1': 0.5872952457051538,
'p': 0.4596622889305816,
'r': 0.8130530973451328},
{'a': 0.660952380952381,
'f1': 0.6959863364645602,
'p': 0.5705285264263213,
'r': 0.8921729611384783},
{'a': 0.21855983772819473,
'f1': 0.10666666666666667,
'p': 0.05723172628304821,
'r': 0.7829787234042553},
{'a': 0.6026184058529072,
'f1': 0.45454545454545453,
'p': 0.3225806451612903,
'r': 0.7692307692307693},
{'a': 0.6807069219440354,
'f1': 0.7090713902308106,
'p': 0.6051305542830967,
'r': 0.8561244329228775},
{'a': 0.605606258148631,
'f1': 0.5437405731523378,
'p': 0.41460609545715926,
'r': 0.7897042716319824},
{'a': 0.7437995397596523,
'f1': 0.7909015025041735,
'p': 0.7159047978843974,
'r': 0.8834498834498834},
{'a': 0.5363106014886341,
'f1': 0.41541973116916053,
'p': 0.28477051460361613,
'r': 0.767572633552015},
{'a': 0.11181766218919692,
'f1': 0.0687691961944715,
'p': 0.03584817244611059,
'r': 0.8422018348623853},
{'a': 0.6964824120603015,
'f1': 0.702950819672131,
'p': 0.6025857223159078,
'r': 0.8434303697875688},
{'a': 0.6620408163265306,
'f1': 0.5460526315789473,
'p': 0.4188393608074012,
'r': 0.784251968503937},
{'a': 0.7014111610006415,
'f1': 0.681491618200479,
'p': 0.5817757009345794,
'r': 0.8224607762180016},
{'a': 0.6079678607298292,
'f1': 0.5402434236356498,
'p': 0.4105011933174224,
'r': 0.7898966704936854}],
'www.cnn.com;2015': [{'a': 0.5461303017052908,
'f1': 0.6239130434782609,
'p': 0.47385800770500824,
'r': 0.9130434782608695},
{'a': 0.30808337569903405,
'f1': 0.3601316408086506,
'p': 0.22635933806146571,
'r': 0.8804597701149425},
{'a': 0.3871693866066404,
'f1': 0.33475870494807575,
'p': 0.21076923076923076,
'r': 0.8130563798219584},
{'a': 0.237528699645168,
'f1': 0.29844440176685233,
'p': 0.1796116504854369,
'r': 0.8819523269012486},
{'a': 0.28893905191873587,
'f1': 0.34402332361516036,
'p': 0.21354705274043434,
'r': 0.8843683083511777},
{'a': 0.487090367428004,
'f1': 0.5143394452280208,
'p': 0.3690958164642375,
'r': 0.8480620155038759},
{'a': 0.44073455759599334,
'f1': 0.4450579790171176,
'p': 0.30142109199700823,
'r': 0.8502109704641351},
{'a': 0.5386666666666666,
'f1': 0.5942142298670837,
'p': 0.4439252336448598,
'r': 0.8983451536643026},
{'a': 0.21576673866090712,
'f1': 0.2719069580910367,
'p': 0.1609304533586518,
'r': 0.875968992248062},
{'a': 0.10892214434551999,
'f1': 0.07365104371799922,
'p': 0.038901601830663615,
'r': 0.6900369003690037},
{'a': 0.30060493252675663,
'f1': 0.3346613545816733,
'p': 0.2074643249176729,
'r': 0.8649885583524027},
{'a': 0.2972493345164153,
'f1': 0.21739130434782608,
'p': 0.12746234067207415,
'r': 0.738255033557047},
{'a': 0.5377104377104377,
'f1': 0.551453773276707,
'p': 0.402479732951836,
'r': 0.8755186721991701},
{'a': 0.6343705799151343,
'f1': 0.731009365244537,
'p': 0.5955913522679102,
'r': 0.9461279461279462},
{'a': 0.4623908663532572,
'f1': 0.5923096511331806,
'p': 0.4293097083794758,
'r': 0.9548440065681445},
{'a': 0.5055798156234838,
'f1': 0.5628485628485629,
'p': 0.40923268870867124,
'r': 0.9010989010989011},
{'a': 0.20300230946882217,
'f1': 0.22848200312989042,
'p': 0.13204134366925063,
'r': 0.8474295190713101},
{'a': 0.40312876052948254,
'f1': 0.36,
'p': 0.23153526970954358,
'r': 0.808695652173913},
{'a': 0.5251872021783526,
'f1': 0.5427728613569321,
'p': 0.39372325249643364,
'r': 0.8734177215189873},
{'a': 0.29772374547335745,
'f1': 0.3710910354412787,
'p': 0.235657546337158,
'r': 0.8725490196078431},
{'a': 0.2304075235109718,
'f1': 0.24980901451489684,
'p': 0.14617791685292802,
'r': 0.8582677165354331},
{'a': 0.6290977208866687,
'f1': 0.7113702623906706,
'p': 0.5729941291585127,
'r': 0.9378603459320948},
{'a': 0.4444444444444444,
'f1': 0.4135188866799205,
'p': 0.27030539311241064,
'r': 0.879492600422833},
{'a': 0.5819144911085887,
'f1': 0.6747129820429792,
'p': 0.526896551724138,
'r': 0.9378068739770867},
{'a': 0.4675090252707581,
'f1': 0.4082246740220662,
'p': 0.27352150537634407,
'r': 0.8043478260869565}],
'www.esquire.com;2000': [{'a': 0.9610738255033557,
'f1': 0.9452830188679244,
'p': 0.9488636363636364,
'r': 0.9417293233082706},
{'a': 0.9625829812914907,
'f1': 0.9585006693440428,
'p': 0.9636608344549125,
'r': 0.9533954727030626},
{'a': 0.9491106719367589,
'f1': 0.9352608422375865,
'p': 0.9649805447470817,
'r': 0.9073170731707317},
{'a': 0.9604743083003953,
'f1': 0.9330357142857143,
'p': 0.9393258426966292,
'r': 0.926829268292683},
{'a': 0.9659798754192621,
'f1': 0.9649382716049383,
'p': 0.9731075697211156,
'r': 0.9569049951028403},
{'a': 0.9618320610687023,
'f1': 0.9403578528827038,
'p': 0.946,
'r': 0.9347826086956522},
{'a': 0.8702734147760326,
'f1': 0.7635206786850478,
'p': 0.7003891050583657,
'r': 0.8391608391608392},
{'a': 0.9437291368621841,
'f1': 0.9423264907135875,
'p': 0.9698189134808853,
'r': 0.9163498098859315},
{'a': 0.9411764705882353,
'f1': 0.8988988988988988,
'p': 0.9432773109243697,
'r': 0.858508604206501},
{'a': 0.954456415279138,
'f1': 0.9556931872320154,
'p': 0.9737864077669903,
'r': 0.9382600561272217},
{'a': 0.9400584795321637,
'f1': 0.9076576576576577,
'p': 0.9372093023255814,
'r': 0.8799126637554585},
{'a': 0.9430379746835443,
'f1': 0.9165964616680706,
'p': 0.9527145359019265,
'r': 0.8831168831168831},
{'a': 0.9679519278918377,
'f1': 0.96529284164859,
'p': 0.9705561613958561,
'r': 0.9600862998921251},
{'a': 0.9479048697621744,
'f1': 0.9159049360146252,
'p': 0.9488636363636364,
'r': 0.8851590106007067},
{'a': 0.9504480759093306,
'f1': 0.9304733727810651,
'p': 0.9588414634146342,
'r': 0.9037356321839081},
{'a': 0.9373088685015291,
'f1': 0.8918205804749341,
'p': 0.9234972677595629,
'r': 0.8622448979591837},
{'a': 0.9499749874937469,
'f1': 0.9528746465598492,
'p': 0.9674641148325359,
'r': 0.9387186629526463},
{'a': 0.950109649122807,
'f1': 0.9334308705193854,
'p': 0.9579579579579579,
'r': 0.9101283880171184},
{'a': 0.9045736871823828,
'f1': 0.842203548085901,
'p': 0.803921568627451,
'r': 0.884313725490196},
{'a': 0.8260325406758448,
'f1': 0.576219512195122,
'p': 0.4833759590792839,
'r': 0.7132075471698113},
{'a': 0.9553314121037464,
'f1': 0.9345991561181434,
'p': 0.9425531914893617,
'r': 0.9267782426778243},
{'a': 0.9464387464387465,
'f1': 0.9426829268292682,
'p': 0.9650436953807741,
'r': 0.9213349225268176},
{'a': 0.943345804382683,
'f1': 0.9206586826347306,
'p': 0.9564541213063764,
'r': 0.8874458874458875},
{'a': 0.1,
'f1': 0.09999999999999999,
'p': 0.05555555555555555,
'r': 0.5},
{'a': 0.9533295389869095,
'f1': 0.9243542435424354,
'p': 0.9488636363636364,
'r': 0.9010791366906474}],
'www.esquire.com;2005': [{'a': 0.9530398322851154,
'f1': 0.9464114832535886,
'p': 0.9611273080660836,
'r': 0.9321394910461829},
{'a': 0.9465422146796776,
'f1': 0.9371884346959123,
'p': 0.9572301425661914,
'r': 0.91796875},
{'a': 0.9585714285714285,
'f1': 0.9390329362298528,
'p': 0.938375350140056,
'r': 0.9396914446002805},
{'a': 0.9636363636363636,
'f1': 0.9542857142857142,
'p': 0.9553775743707094,
'r': 0.9531963470319634},
{'a': 0.9630901287553648,
'f1': 0.9570858283433133,
'p': 0.9609218436873748,
'r': 0.9532803180914513},
{'a': 0.9211159211159211,
'f1': 0.8790560471976402,
'p': 0.8989441930618401,
'r': 0.86002886002886},
{'a': 0.9058993847267462,
'f1': 0.9183417085427136,
'p': 0.8702380952380953,
'r': 0.9720744680851063},
{'a': 0.9181996086105675,
'f1': 0.9040844424047728,
'p': 0.8747779751332149,
'r': 0.9354226020892688},
{'a': 0.9591222030981067,
'f1': 0.9522373051784816,
'p': 0.958502024291498,
'r': 0.9460539460539461},
{'a': 0.9603463992707384,
'f1': 0.9502572898799314,
'p': 0.9432463110102156,
'r': 0.9573732718894009},
{'a': 0.9429404414827155,
'f1': 0.9302798982188295,
'p': 0.9185929648241206,
'r': 0.9422680412371134},
{'a': 0.920041004613019,
'f1': 0.8664383561643836,
'p': 0.840531561461794,
'r': 0.8939929328621908},
{'a': 0.9002638522427441,
'f1': 0.815968841285297,
'p': 0.7688073394495413,
'r': 0.8692946058091287},
{'a': 0.9523595505617978,
'f1': 0.9440928270042194,
'p': 0.9582441113490364,
'r': 0.9303534303534303},
{'a': 0.9564459930313589,
'f1': 0.9479166666666666,
'p': 0.9479166666666666,
'r': 0.9479166666666666},
{'a': 0.8827899298390425,
'f1': 0.8565656565656565,
'p': 0.7969924812030075,
'r': 0.925764192139738},
{'a': 0.9586449626044875,
'f1': 0.9499467518636849,
'p': 0.958109559613319,
'r': 0.941921858500528},
{'a': 0.9640317858636553,
'f1': 0.9595484477892757,
'p': 0.9622641509433962,
'r': 0.9568480300187617},
{'a': 0.9663256606990622,
'f1': 0.9613313754282917,
'p': 0.958984375,
'r': 0.9636898920510304},
{'a': 0.9699303263659699,
'f1': 0.9707142857142858,
'p': 0.9714081486776269,
'r': 0.9700214132762313},
{'a': 0.9461196243203164,
'f1': 0.920611798980335,
'p': 0.9390787518573551,
'r': 0.9028571428571428},
{'a': 0.948937908496732,
'f1': 0.943155979990905,
'p': 0.9308797127468582,
'r': 0.9557603686635945},
{'a': 0.9541052631578948,
'f1': 0.9472665699080792,
'p': 0.9616895874263262,
'r': 0.9332697807435653},
{'a': 0.9571852479864349,
'f1': 0.9505628976994616,
'p': 0.960435212660732,
'r': 0.9408914728682171},
{'a': 0.9658650116369278,
'f1': 0.9645732689210951,
'p': 0.9684721099434115,
'r': 0.9607056936647955}],
'www.esquire.com;2010': [{'a': 0.5643207012116525,
'f1': 0.4433465085638999,
'p': 0.30660592255125285,
'r': 0.8002378121284186},
{'a': 0.6595404595404596,
'f1': 0.711284310403253,
'p': 0.5828936406553735,
'r': 0.9122120817036071},
{'a': 0.4677680596047943,
'f1': 0.20742884708152434,
'p': 0.12092238470191226,
'r': 0.7288135593220338},
{'a': 0.5635330578512396,
'f1': 0.5412595005428882,
'p': 0.3940711462450593,
'r': 0.8639514731369151},
{'a': 0.5767780849459663,
'f1': 0.5699693564862105,
'p': 0.426279602750191,
'r': 0.8597842835130971},
{'a': 0.530896150113232,
'f1': 0.4304791830322074,
'p': 0.2860125260960334,
'r': 0.8698412698412699},
{'a': 0.4075716234652115,
'f1': 0.286652977412731,
'p': 0.1713303878252332,
'r': 0.8768844221105527},
{'a': 0.5689839572192513,
'f1': 0.5470213563132258,
'p': 0.3924731182795699,
'r': 0.9023485784919654},
{'a': 0.5669208519589798,
'f1': 0.5385261978145138,
'p': 0.3895419537900284,
'r': 0.8720508166969148},
{'a': 0.5872011251758087,
'f1': 0.6014935505770536,
'p': 0.4532742155525239,
'r': 0.8937457969065232},
{'a': 0.5289658906334597,
'f1': 0.45861854387056633,
'p': 0.3207136640557006,
'r': 0.8045851528384279},
{'a': 0.5566271700192891,
'f1': 0.5137503777576308,
'p': 0.3648068669527897,
'r': 0.8682328907048008},
{'a': 0.6042534531900899,
'f1': 0.6395046934291992,
'p': 0.49050245098039214,
'r': 0.9185312679288583},
{'a': 0.5003152585119798,
'f1': 0.31355565179731487,
'p': 0.19738276990185388,
'r': 0.7621052631578947},
{'a': 0.5922096657850445,
'f1': 0.5956127801621364,
'p': 0.45237232886635276,
'r': 0.8715980460572226},
{'a': 0.49784791965566716,
'f1': 0.2143658810325477,
'p': 0.12402597402597403,
'r': 0.7892561983471075},
{'a': 0.3755117231112765,
'f1': 0.17583497053045186,
'p': 0.0982436882546652,
'r': 0.8364485981308412},
{'a': 0.5544525547445256,
'f1': 0.5058290155440415,
'p': 0.35164340387212967,
'r': 0.9008073817762399},
{'a': 0.568724279835391,
'f1': 0.5549263873159682,
'p': 0.40312628547922663,
'r': 0.8900999091734787},
{'a': 0.6699975018735949,
'f1': 0.681763430498675,
'p': 0.5465430668211665,
'r': 0.9058898847631242},
{'a': 0.40389294403892945,
'f1': 0.26109435588108576,
'p': 0.15380710659898478,
'r': 0.8632478632478633},
{'a': 0.391304347826087,
'f1': 0.2546583850931677,
'p': 0.1490134994807892,
'r': 0.875},
{'a': 0.6918226600985221,
'f1': 0.7528445006321113,
'p': 0.629492600422833,
'r': 0.9363207547169812},
{'a': 0.5951573849878935,
'f1': 0.6032273374466066,
'p': 0.45785302593659943,
'r': 0.8838664812239221},
{'a': 0.630575117370892,
'f1': 0.6488145048814504,
'p': 0.5043365134431916,
'r': 0.9093041438623924}],
'www.esquire.com;2015': [{'a': 0.28155849110591824,
'f1': 0.01374795417348609,
'p': 0.006949500297835727,
'r': 0.6325301204819277},
{'a': 0.28711102754536055,
'f1': 0.03327383987761346,
'p': 0.017044341409260106,
'r': 0.696},
{'a': 0.27682545695615113,
'f1': 0.014328127016909773,
'p': 0.007239287810604579,
'r': 0.6894409937888198},
{'a': 0.46926977687626775,
'f1': 0.08112379280070238,
'p': 0.04307290695506247,
'r': 0.6957831325301205},
{'a': 0.3115534984047095,
'f1': 0.13151927437641722,
'p': 0.07138900855437258,
'r': 0.8339324227174695},
{'a': 0.3296091814111203,
'f1': 0.1882402484602832,
'p': 0.10571124512238382,
'r': 0.8583773403744599},
{'a': 0.2885415703320078,
'f1': 0.06056905605079986,
'p': 0.03155015584250366,
'r': 0.7549467275494672},
{'a': 0.28600444003589814,
'f1': 0.03177043300025621,
'p': 0.016255899318300997,
'r': 0.6966292134831461},
{'a': 0.3803981623277182,
'f1': 0.17495921696574226,
'p': 0.09766647694934548,
'r': 0.8387096774193549},
{'a': 0.480719397828233,
'f1': 0.5357822453876065,
'p': 0.37498552403011004,
'r': 0.9380069524913094},
{'a': 0.2905982905982906,
'f1': 0.05122118808170405,
'p': 0.026523482986156036,
'r': 0.7441016333938294},
{'a': 0.42642440556303274,
'f1': 0.22177133001927563,
'p': 0.12893712398254098,
'r': 0.7920289855072464},
{'a': 0.28823722302899707,
'f1': 0.04500314267756129,
'p': 0.023218107529671184,
'r': 0.7291242362525459},
{'a': 0.3103961736305388,
'f1': 0.11633420063602197,
'p': 0.06269084564092976,
'r': 0.8060897435897436},
{'a': 0.30111370823594114,
'f1': 0.08463893390959842,
'p': 0.04466265441875198,
'r': 0.8066361556064073},
{'a': 0.36065963688258146,
'f1': 0.0967741935483871,
'p': 0.05160673754629076,
'r': 0.7755834829443446},
{'a': 0.289927787677014,
'f1': 0.05231866825208085,
'p': 0.02706727967363854,
'r': 0.7798507462686567},
{'a': 0.28603752239087393,
'f1': 0.024475074069303104,
'p': 0.012450851900393184,
'r': 0.7142857142857143},
{'a': 0.3594932674687276,
'f1': 0.09215132693393563,
'p': 0.04895608351331893,
'r': 0.783109404990403},
{'a': 0.320497058048652,
'f1': 0.1641911963273022,
'p': 0.090838462917588,
'r': 0.8529741863075196},
{'a': 0.28706446607419944,
'f1': 0.023192887514495556,
'p': 0.011798636601992658,
'r': 0.6766917293233082},
{'a': 0.4193067197045035,
'f1': 0.3059682485779777,
'p': 0.18523848684210525,
'r': 0.878595806923452},
{'a': 0.33102908569192646,
'f1': 0.18479470198675496,
'p': 0.10366759793140344,
'r': 0.8499025341130604},
{'a': 0.32755466504050873,
'f1': 0.1720476241553148,
'p': 0.09607666966157533,
'r': 0.822142491030241},
{'a': 0.35135792460478316,
'f1': 0.058816609810610515,
'p': 0.03058103975535168,
'r': 0.7668711656441718},
{'a': 0.28490255928621744,
'f1': 0.034369055168040584,
'p': 0.01760998115537072,
'r': 0.7112860892388452}],
'www.forbes.com;2000': [{'a': 0.7603195739014648,
'f1': 0.7727272727272727,
'p': 0.6777408637873754,
'r': 0.8986784140969163},
{'a': 0.741304347826087,
'f1': 0.75564681724846,
'p': 0.6216216216216216,
'r': 0.9633507853403142},
{'a': 0.8787784356497351,
'f1': 0.9231073334651118,
'p': 0.8801356954391255,
'r': 0.9704904405652536},
{'a': 0.7747368421052632,
'f1': 0.7995003123048094,
'p': 0.6837606837606838,
'r': 0.9624060150375939},
{'a': 0.6669542709232097,
'f1': 0.5150753768844221,
'p': 0.36541889483065954,
'r': 0.8723404255319149},
{'a': 0.7552631578947369,
'f1': 0.757496740547588,
'p': 0.6579841449603624,
'r': 0.8924731182795699},
{'a': 0.8159443552701979,
'f1': 0.8610662358642972,
'p': 0.7781021897810219,
'r': 0.9638336347197106},
{'a': 0.717391304347826,
'f1': 0.6672550750220653,
'p': 0.5550660792951542,
'r': 0.8362831858407079},
{'a': 0.8219106957424714,
'f1': 0.8670027142303218,
'p': 0.7829131652661064,
'r': 0.9713292788879235},
{'a': 0.9711538461538461,
'f1': 0.9811202013845186,
'p': 0.9755944931163955,
'r': 0.9867088607594937},
{'a': 0.711376404494382,
'f1': 0.6888720666161998,
'p': 0.5963302752293578,
'r': 0.8154121863799283},
{'a': 0.880854252529037,
'f1': 0.9228155339805825,
'p': 0.8740229885057471,
'r': 0.9773778920308483},
{'a': 0.6704361873990307,
'f1': 0.5903614457831325,
'p': 0.49830508474576274,
'r': 0.7241379310344828},
{'a': 0.7071742313323572,
'f1': 0.661590524534687,
'p': 0.5634005763688761,
'r': 0.8012295081967213},
{'a': 0.6441837732160313,
'f1': 0.4468085106382979,
'p': 0.32450331125827814,
'r': 0.7170731707317073},
{'a': 0.7682789651293588,
'f1': 0.8140794223826715,
'p': 0.714172604908947,
'r': 0.9464847848898216},
{'a': 0.8058455114822547,
'f1': 0.8495145631067961,
'p': 0.7658643326039387,
'r': 0.9536784741144414},
{'a': 0.7821052631578947,
'f1': 0.8318440292445167,
'p': 0.7361610352264558,
'r': 0.9561157796451915},
{'a': 0.7606382978723404,
'f1': 0.7844598190526876,
'p': 0.7012369172216937,
'r': 0.8900966183574879},
{'a': 0.6845637583892618,
'f1': 0.36199095022624433,
'p': 0.23904382470119523,
'r': 0.7453416149068323},
{'a': 0.9127272727272727,
'f1': 0.84,
'p': 0.7777777777777778,
'r': 0.9130434782608695},
{'a': 0.7254335260115607,
'f1': 0.6900489396411094,
'p': 0.5834482758620689,
'r': 0.844311377245509},
{'a': 0.8086194302410519,
'f1': 0.8335451080050825,
'p': 0.7446083995459705,
'r': 0.9466089466089466},
{'a': 0.7429359062715368,
'f1': 0.7534699272967614,
'p': 0.641169853768279,
'r': 0.9134615384615384},
{'a': 0.8815298507462687,
'f1': 0.9247778874629812,
'p': 0.8794592564776568,
'r': 0.9750208159866778},
{'a': 0.7494922139471902,
'f1': 0.7787081339712919,
'p': 0.6636085626911316,
'r': 0.9421128798842258},
{'a': 0.733142037302726,
'f1': 0.7138461538461538,
'p': 0.5895806861499364,
'r': 0.9044834307992202},
{'a': 0.9517426273458445,
'f1': 0.9647058823529412,
'p': 0.9669811320754716,
'r': 0.9624413145539906}],
'www.forbes.com;2005': [{'a': 0.922463768115942,
'f1': 0.5868725868725868,
'p': 0.4935064935064935,
'r': 0.7238095238095238},
{'a': 0.893611404435058,
'f1': 0.8388644542183126,
'p': 0.8026013771996939,
'r': 0.8785594639865997},
{'a': 0.923974540311174,
'f1': 0.7174770039421814,
'p': 0.6807980049875312,
'r': 0.7583333333333333},
{'a': 0.8789716926632004,
'f1': 0.7067879636109169,
'p': 0.6242274412855378,
'r': 0.8145161290322581},
{'a': 0.9320754716981132,
'f1': 0.8411764705882353,
'p': 0.8125,
'r': 0.8719512195121951},
{'a': 0.9328429804924848,
'f1': 0.8467153284671532,
'p': 0.8215297450424929,
'r': 0.8734939759036144},
{'a': 0.9169241331960178,
'f1': 0.6620111731843575,
'p': 0.5895522388059702,
'r': 0.7547770700636943},
{'a': 0.9158091674462114,
'f1': 0.7926267281105991,
'p': 0.7510917030567685,
'r': 0.8390243902439024},
{'a': 0.9332394366197183,
'f1': 0.8892005610098177,
'p': 0.879740980573543,
'r': 0.8988657844990549},
{'a': 0.8787878787878788,
'f1': 0.0125,
'p': 0.0078125,
'r': 0.03125},
{'a': 0.9221065909807632,
'f1': 0.6535764375876578,
'p': 0.6348773841961853,
'r': 0.6734104046242775},
{'a': 0.9324227174694465,
'f1': 0.7062499999999999,
'p': 0.6420454545454546,
'r': 0.7847222222222222},
{'a': 0.9324990519529769,
'f1': 0.5972850678733032,
'p': 0.5116279069767442,
'r': 0.717391304347826},
{'a': 0.897003745318352,
'f1': 0.7887323943661972,
'p': 0.7368421052631579,
'r': 0.8484848484848485},
{'a': 0.923697270471464,
'f1': 0.8282122905027932,
'p': 0.8179310344827586,
'r': 0.8387553041018387},
{'a': 0.9231056364315043,
'f1': 0.6199261992619928,
'p': 0.56,
'r': 0.6942148760330579},
{'a': 0.9271175311884439,
'f1': 0.8388969521044993,
'p': 0.8210227272727273,
'r': 0.857566765578635},
{'a': 0.9274785801713586,
'f1': 0.8657223796033995,
'p': 0.854586129753915,
'r': 0.8771526980482205},
{'a': 0.9183731513083049,
'f1': 0.8444444444444444,
'p': 0.8358369098712446,
'r': 0.8532311062431545},
{'a': 0.9288114879315612,
'f1': 0.8535512256442489,
'p': 0.8382716049382716,
'r': 0.8693982074263764},
{'a': 0.8757146408153119,
'f1': 0.7795414462081128,
'p': 0.7163695299837926,
'r': 0.8549323017408124},
{'a': 0.918732350172576,
'f1': 0.778063410454156,
'p': 0.7566666666666667,
'r': 0.800705467372134},
{'a': 0.918977202711029,
'f1': 0.8101083032490974,
'p': 0.7912552891396333,
'r': 0.8298816568047337},
{'a': 0.926164136866478,
'f1': 0.8929503916449086,
'p': 0.9047619047619048,
'r': 0.8814432989690721},
{'a': 0.9195926025194318,
'f1': 0.8584905660377359,
'p': 0.8363970588235294,
'r': 0.8817829457364341}],
'www.forbes.com;2010': [{'a': 0.803030303030303,
'f1': 0.4956896551724137,
'p': 0.4121863799283154,
'r': 0.6216216216216216},
{'a': 0.8355041003376749,
'f1': 0.8311045071817731,
'p': 0.8121974830590513,
'r': 0.8509127789046653},
{'a': 0.834983498349835,
'f1': 0.7706422018348623,
'p': 0.6666666666666666,
'r': 0.9130434782608695},
{'a': 0.8385175380542687,
'f1': 0.7881944444444444,
'p': 0.6941896024464832,
'r': 0.9116465863453815},
{'a': 0.8763654419066534,
'f1': 0.8819345661450925,
'p': 0.8355795148247979,
'r': 0.9337349397590361},
{'a': 0.8413705583756346,
'f1': 0.7990353697749196,
'p': 0.7213352685050798,
'r': 0.8954954954954955},
{'a': 0.8154583582983823,
'f1': 0.7220216606498195,
'p': 0.6279434850863422,
'r': 0.8492569002123143},
{'a': 0.8404907975460123,
'f1': 0.8281249999999999,
'p': 0.7291005291005291,
'r': 0.9582753824756607},
{'a': 0.8345111896348646,
'f1': 0.8068728522336769,
'p': 0.7264851485148515,
'r': 0.9072642967542504},
{'a': 0.8261648745519713,
'f1': 0.742249778565102,
'p': 0.6506211180124224,
'r': 0.8639175257731959},
{'a': 0.8678511937812327,
'f1': 0.8598351001177856,
'p': 0.7883369330453563,
'r': 0.9455958549222798},
{'a': 0.8579756226815051,
'f1': 0.8538713195201745,
'p': 0.7997957099080695,
'r': 0.9157894736842105},
{'a': 0.8235294117647058,
'f1': 0.7715481171548119,
'p': 0.6749633967789166,
'r': 0.900390625},
{'a': 0.8467322151532678,
'f1': 0.8222669349429913,
'p': 0.7512254901960784,
'r': 0.9081481481481481},
{'a': 0.822998193859121,
'f1': 0.7762557077625571,
'p': 0.6737120211360634,
'r': 0.9156193895870736},
{'a': 0.8538390379278445,
'f1': 0.8438735177865613,
'p': 0.7870967741935484,
'r': 0.9094781682641108},
{'a': 0.7989521938441388,
'f1': 0.7039537126325941,
'p': 0.6684981684981685,
'r': 0.7433808553971487},
{'a': 0.8128772635814889,
'f1': 0.7360454115421002,
'p': 0.7021660649819494,
'r': 0.7733598409542743},
{'a': 0.8265830005704506,
'f1': 0.7342657342657343,
'p': 0.6552262090483619,
'r': 0.8349900596421471},
{'a': 0.8058429701765064,
'f1': 0.7498039215686273,
'p': 0.6297760210803689,
'r': 0.9263565891472868},
{'a': 0.8155028827674567,
'f1': 0.7587939698492463,
'p': 0.7365853658536585,
'r': 0.7823834196891192},
{'a': 0.5447540011855364,
'f1': 0.1812366737739872,
'p': 0.10278113663845223,
'r': 0.7657657657657657},
{'a': 0.5952802359882006,
'f1': 0.3466666666666667,
'p': 0.21487603305785125,
'r': 0.896551724137931},
{'a': 0.818087318087318,
'f1': 0.8064159292035399,
'p': 0.7098344693281402,
'r': 0.9334186939820742},
{'a': 0.8540609137055838,
'f1': 0.8275862068965517,
'p': 0.739946380697051,
'r': 0.9387755102040817}],
'www.forbes.com;2015': [{'a': 0.6009918845807033,
'f1': 0.5982750794371311,
'p': 0.45354439091534754,
'r': 0.8786666666666667},
{'a': 0.5228988424760946,
'f1': 0.4397163120567376,
'p': 0.30194805194805197,
'r': 0.808695652173913},
{'a': 0.5684647302904564,
'f1': 0.4474616292798111,
'p': 0.31530782029950083,
'r': 0.7703252032520326},
{'a': 0.5637982195845698,
'f1': 0.486013986013986,
'p': 0.347789824854045,
'r': 0.8065764023210832},
{'a': 0.5737037037037037,
'f1': 0.5218113834648941,
'p': 0.38456827924066134,
'r': 0.8113695090439277},
{'a': 0.5261813537675607,
'f1': 0.5360566902876198,
'p': 0.3793510324483776,
'r': 0.9133522727272727},
{'a': 0.6267262388302194,
'f1': 0.6455842653297338,
'p': 0.5097442143727162,
'r': 0.8801261829652997},
{'a': 0.6165389527458492,
'f1': 0.5712245626561941,
'p': 0.45454545454545453,
'r': 0.7684918347742555},
{'a': 0.43635551585529253,
'f1': 0.3984747378455672,
'p': 0.2570725707257073,
'r': 0.885593220338983},
{'a': 0.3153623188405797,
'f1': 0.2385557704706641,
'p': 0.13941220798794274,
'r': 0.8258928571428571},
{'a': 0.5164212910532276,
'f1': 0.3200636942675159,
'p': 0.20447609359104782,
'r': 0.7362637362637363},
{'a': 0.56483191725157,
'f1': 0.602832097100472,
'p': 0.4554253693326541,
'r': 0.8913260219341974},
{'a': 0.608122179798681,
'f1': 0.6417010472865756,
'p': 0.5017369727047146,
'r': 0.8899647887323944},
{'a': 0.5861520095503382,
'f1': 0.607250755287009,
'p': 0.4612736660929432,
'r': 0.8883977900552487},
{'a': 0.5475171232876712,
'f1': 0.5086006508600651,
'p': 0.36321381142098275,
'r': 0.8480620155038759},
{'a': 0.5645315487571702,
'f1': 0.48791455874086564,
'p': 0.3472,
'r': 0.8204158790170132},
{'a': 0.5972944849115505,
'f1': 0.5044814340588989,
'p': 0.3835171966255678,
'r': 0.7369077306733167},
{'a': 0.5224932249322494,
'f1': 0.36297903109182933,
'p': 0.23882017126546146,
'r': 0.7560240963855421},
{'a': 0.49003466204506063,
'f1': 0.46862302483069984,
'p': 0.31860036832412525,
'r': 0.8856655290102389},
{'a': 0.5722679200940071,
'f1': 0.5997067448680351,
'p': 0.4487109160724081,
'r': 0.9038674033149171},
{'a': 0.41139240506329117,
'f1': 0.26235509456985967,
'p': 0.15774027879677183,
'r': 0.7789855072463768},
{'a': 0.5893101873001371,
'f1': 0.5466464952092789,
'p': 0.40813253012048195,
'r': 0.8274809160305343},
{'a': 0.36214185063410054,
'f1': 0.09345794392523364,
'p': 0.050468637346791634,
'r': 0.6306306306306306},
{'a': 0.5615592435353145,
'f1': 0.5968772178850248,
'p': 0.44263157894736843,
'r': 0.9161220043572985},
{'a': 0.5688073394495413,
'f1': 0.531405782652044,
'p': 0.3867924528301887,
'r': 0.8487261146496815}],
'www.foxnews.com;2000': [{'a': 0.9518828451882845,
'f1': 0.9187279151943463,
'p': 0.9285714285714286,
'r': 0.9090909090909091},
{'a': 0.960635359116022,
'f1': 0.8800000000000001,
'p': 0.9330357142857143,
'r': 0.8326693227091634},
{'a': 0.9295958279009127,
'f1': 0.9021739130434783,
'p': 0.8691099476439791,
'r': 0.9378531073446328},
{'a': 0.9138438880706922,
'f1': 0.8956289027653881,
'p': 0.899641577060932,
'r': 0.8916518650088809},
{'a': 0.9568106312292359,
'f1': 0.8987012987012987,
'p': 0.9301075268817204,
'r': 0.8693467336683417},
{'a': 0.9374437443744374,
'f1': 0.9506567270145545,
'p': 0.926002766251729,
'r': 0.9766593727206418},
{'a': 0.7709205020920502,
'f1': 0.7818725099601594,
'p': 0.6618887015177066,
'r': 0.9549878345498783},
{'a': 0.9540372670807453,
'f1': 0.9357638888888891,
'p': 0.9373913043478261,
'r': 0.9341421143847487},
{'a': 0.9479315263908702,
'f1': 0.9557575757575759,
'p': 0.9651162790697675,
'r': 0.946578631452581},
{'a': 0.9376609994848016,
'f1': 0.8826382153249273,
'p': 0.9191919191919192,
'r': 0.8488805970149254},
{'a': 0.966804979253112,
'f1': 0.9065420560747663,
'p': 0.9326923076923077,
'r': 0.8818181818181818},
{'a': 0.8981636060100167,
'f1': 0.8539505187549881,
'p': 0.816793893129771,
'r': 0.8946488294314381},
{'a': 0.9306184012066365,
'f1': 0.8696883852691218,
'p': 0.8319783197831978,
'r': 0.9109792284866469},
{'a': 0.9382022471910112,
'f1': 0.9022222222222221,
'p': 0.8638297872340426,
'r': 0.9441860465116279},
{'a': 0.2631578947368421,
'f1': 0.3,
'p': 0.17647058823529413,
'r': 1.0},
{'a': 0.9135060129509713,
'f1': 0.9034589571502323,
'p': 0.8901322482197355,
'r': 0.9171907756813418},
{'a': 0.9339651482726995,
'f1': 0.9274680993955676,
'p': 0.933739012846518,
'r': 0.9212808539026017},
{'a': 0.9387078961899503,
'f1': 0.9285254346426272,
'p': 0.9327296248382924,
'r': 0.9243589743589744},
{'a': 0.933705512909979,
'f1': 0.8966267682263331,
'p': 0.865546218487395,
'r': 0.9300225733634312},
{'a': 0.9861636951882701,
'f1': 0.9925436806766332,
'p': 0.9959797498511018,
'r': 0.9891312384473198},
{'a': 0.9393139841688655,
'f1': 0.936986301369863,
'p': 0.9173819742489271,
'r': 0.9574468085106383},
{'a': 0.8996683250414593,
'f1': 0.9202373104812129,
'p': 0.8914431673052363,
'r': 0.9509536784741145},
{'a': 0.9659790083242852,
'f1': 0.9745533297238765,
'p': 0.9787928221859706,
'r': 0.9703504043126685},
{'a': 0.949358059914408,
'f1': 0.9095541401273886,
'p': 0.9037974683544304,
'r': 0.9153846153846154},
{'a': 0.9705240174672489,
'f1': 0.9608695652173913,
'p': 0.9822222222222222,
'r': 0.9404255319148936}],
'www.foxnews.com;2005': [{'a': 0.6034149484536082,
'f1': 0.6691749529696318,
'p': 0.5075417855686915,
'r': 0.9818611987381703},
{'a': 0.442833607907743,
'f1': 0.45749117741418033,
'p': 0.30135249366018596,
'r': 0.9494007989347537},
{'a': 0.458528951486698,
'f1': 0.4917743830787309,
'p': 0.33466613354658137,
'r': 0.9269102990033222},
{'a': 0.4705693148922483,
'f1': 0.5036188178528348,
'p': 0.3471933471933472,
'r': 0.9165751920965971},
{'a': 0.4147383410466358,
'f1': 0.3946980854197349,
'p': 0.24907063197026022,
'r': 0.950354609929078},
{'a': 0.3608128834355828,
'f1': 0.2672527472527472,
'p': 0.15565796210957503,
'r': 0.9440993788819876},
{'a': 0.551033386327504,
'f1': 0.6097291321171918,
'p': 0.44565656565656564,
'r': 0.9650043744531933},
{'a': 0.599594868332208,
'f1': 0.3762272089761571,
'p': 0.24183006535947713,
'r': 0.8468823993685872},
{'a': 0.7557954127315099,
'f1': 0.8411140371877743,
'p': 0.7352120535714286,
'r': 0.9826589595375722},
{'a': 0.5216294160057678,
'f1': 0.30194634402945814,
'p': 0.1810725552050473,
'r': 0.9082278481012658},
{'a': 0.5412064570943076,
'f1': 0.55,
'p': 0.38573933372296904,
'r': 0.9579100145137881},
{'a': 0.45275779376498804,
'f1': 0.3491158014831717,
'p': 0.21549295774647886,
'r': 0.918918918918919},
{'a': 0.46584641493423845,
'f1': 0.4211494252873563,
'p': 0.27213309566250743,
'r': 0.9308943089430894},
{'a': 0.3951965065502183,
'f1': 0.29228410832907514,
'p': 0.17470983506414173,
'r': 0.89375},
{'a': 0.47703180212014135,
'f1': 0.5151876116736153,
'p': 0.35262943334692215,
'r': 0.9558011049723757},
{'a': 0.456710653363373,
'f1': 0.48661800486618007,
'p': 0.3270645952575634,
'r': 0.9501187648456056},
{'a': 0.46838258659040355,
'f1': 0.5013412816691505,
'p': 0.33966074313408723,
'r': 0.9567690557451649},
{'a': 0.6817651632970451,
'f1': 0.7405294024409574,
'p': 0.5974424552429668,
'r': 0.9737390579408086},
{'a': 0.39481946624803765,
'f1': 0.3248686514886165,
'p': 0.19619249074563722,
'r': 0.9440203562340967},
{'a': 0.40449775112443775,
'f1': 0.41895845523698066,
'p': 0.26957831325301207,
'r': 0.9396325459317585},
{'a': 0.6685860524632118,
'f1': 0.7033218785796106,
'p': 0.5576748410535877,
'r': 0.951937984496124},
{'a': 0.46245186136071886,
'f1': 0.491962390051562,
'p': 0.33183306055646483,
'r': 0.9507620164126612},
{'a': 0.401333737496211,
'f1': 0.408859622867405,
'p': 0.2614854517611026,
'r': 0.9368998628257887},
{'a': 0.5186202686202687,
'f1': 0.5137218624730188,
'p': 0.35192226446979297,
'r': 0.9509132420091324},
{'a': 0.46610716591349255,
'f1': 0.4969586374695864,
'p': 0.3362139917695473,
'r': 0.9522144522144522}],
'www.foxnews.com;2010': [{'a': 0.7668161434977578,
'f1': 0.3764988009592326,
'p': 0.24881141045958796,
'r': 0.7733990147783252},
{'a': 0.7757424368581738,
'f1': 0.6937073540561032,
'p': 0.5988219895287958,
'r': 0.8243243243243243},
{'a': 0.2345960748516659,
'f1': 0.17103311913000496,
'p': 0.09495060373216246,
'r': 0.8606965174129353},
{'a': 0.7676311030741411,
'f1': 0.32189973614775724,
'p': 0.20854700854700856,
'r': 0.7052023121387283},
{'a': 0.7303617099013519,
'f1': 0.5472392638036809,
'p': 0.40879926672777267,
'r': 0.8274582560296846},
{'a': 0.7756373937677054,
'f1': 0.6793522267206478,
'p': 0.5883590462833099,
'r': 0.803639846743295},
{'a': 0.7619047619047619,
'f1': 0.5780474351902922,
'p': 0.45565217391304347,
'r': 0.7903469079939668},
{'a': 0.7622270070747462,
'f1': 0.6082108464267614,
'p': 0.5050505050505051,
'r': 0.7643312101910829},
{'a': 0.7117411850236278,
'f1': 0.507147296457427,
'p': 0.36298932384341637,
'r': 0.8412371134020619},
{'a': 0.7625284738041003,
'f1': 0.6472081218274112,
'p': 0.5379746835443038,
'r': 0.8121019108280255},
{'a': 0.7522093813732155,
'f1': 0.4774193548387096,
'p': 0.3501577287066246,
'r': 0.75},
{'a': 0.760662671836447,
'f1': 0.4270042194092827,
'p': 0.3092909535452323,
'r': 0.6893732970027248},
{'a': 0.749707145646232,
'f1': 0.4784377542717656,
'p': 0.35336538461538464,
'r': 0.7405541561712846},
{'a': 0.8027233477250083,
'f1': 0.7522935779816513,
'p': 0.6424501424501424,
'r': 0.9074446680080482},
{'a': 0.7579972183588317,
'f1': 0.6481294236602629,
'p': 0.5136217948717948,
'r': 0.8780821917808219},
{'a': 0.7645959831854273,
'f1': 0.3471502590673575,
'p': 0.22521008403361345,
'r': 0.7570621468926554},
{'a': 0.7599640395564878,
'f1': 0.607545320921117,
'p': 0.5,
'r': 0.7740324594257179},
{'a': 0.811549368393916,
'f1': 0.8121305576972501,
'p': 0.7301293900184843,
'r': 0.9148812970469021},
{'a': 0.7687253613666228,
'f1': 0.38028169014084506,
'p': 0.2583732057416268,
'r': 0.72},
{'a': 0.7672496025437202,
'f1': 0.7031630170316301,
'p': 0.5776149233844103,
'r': 0.8984455958549222},
{'a': 0.7780074410913601,
'f1': 0.5251989389920424,
'p': 0.3907894736842105,
'r': 0.8005390835579514},
{'a': 0.7532252729077076,
'f1': 0.5155844155844156,
'p': 0.3899803536345776,
'r': 0.7605363984674329},
{'a': 0.7881653607133208,
'f1': 0.7731481481481481,
'p': 0.6816326530612244,
'r': 0.893048128342246},
{'a': 0.7628689087165408,
'f1': 0.46805234795996925,
'p': 0.3370288248337029,
'r': 0.7657430730478589},
{'a': 0.7885294117647059,
'f1': 0.7472759226713532,
'p': 0.6545566502463054,
'r': 0.8705978705978706}],
'www.foxnews.com;2015': [{'a': 0.8563569682151589,
'f1': 0.8865282472235635,
'p': 0.8652214891611687,
'r': 0.9089108910891089},
{'a': 0.8018691588785046,
'f1': 0.7188328912466844,
'p': 0.6878172588832487,
'r': 0.7527777777777778},
{'a': 0.8519888991674376,
'f1': 0.6444444444444445,
'p': 0.6223175965665236,
'r': 0.6682027649769585},
{'a': 0.7843719090009891,
'f1': 0.6812865497076024,
'p': 0.589873417721519,
'r': 0.8062283737024222},
{'a': 0.8609422492401215,
'f1': 0.8042780748663102,
'p': 0.7752577319587629,
'r': 0.8355555555555556},
{'a': 0.8248520710059172,
'f1': 0.672566371681416,
'p': 0.5984251968503937,
'r': 0.7676767676767676},
{'a': 0.826963906581741,
'f1': 0.7433070866141732,
'p': 0.6685552407932012,
'r': 0.8368794326241135},
{'a': 0.8463476070528967,
'f1': 0.7328467153284672,
'p': 0.7011173184357542,
'r': 0.7675840978593272},
{'a': 0.8742857142857143,
'f1': 0.8811524609843938,
'p': 0.8706998813760379,
'r': 0.8918590522478737},
{'a': 0.8710010319917441,
'f1': 0.5954692556634303,
'p': 0.5227272727272727,
'r': 0.6917293233082706},
{'a': 0.8536170212765958,
'f1': 0.8093126385809313,
'p': 0.8039647577092511,
'r': 0.8147321428571429},
{'a': 0.8363201911589009,
'f1': 0.766609880749574,
'p': 0.7009345794392523,
'r': 0.8458646616541353},
{'a': 0.8519900497512438,
'f1': 0.8344923504867872,
'p': 0.8075370121130552,
'r': 0.8633093525179856},
{'a': 0.8399339933993399,
'f1': 0.8283185840707965,
'p': 0.7878787878787878,
'r': 0.8731343283582089},
{'a': 0.8649334178820546,
'f1': 0.8924785461887934,
'p': 0.8700787401574803,
'r': 0.9160621761658031},
{'a': 0.8697394789579158,
'f1': 0.8959167333867094,
'p': 0.8952,
'r': 0.8966346153846154},
{'a': 0.7922077922077922,
'f1': 0.6363636363636364,
'p': 0.5645161290322581,
'r': 0.7291666666666666},
{'a': 0.8699234844025897,
'f1': 0.9002257336343116,
'p': 0.8815207780725022,
'r': 0.9197416974169742},
{'a': 0.853763440860215,
'f1': 0.84012539184953,
'p': 0.8271604938271605,
'r': 0.8535031847133758},
{'a': 0.8404864091559371,
'f1': 0.8631062001227747,
'p': 0.8155452436194895,
'r': 0.9165580182529335},
{'a': 0.8048780487804879,
'f1': 0.8079999999999998,
'p': 0.7917133258678611,
'r': 0.8249708284714119},
{'a': 0.8483572030328559,
'f1': 0.8369565217391305,
'p': 0.8048780487804879,
'r': 0.8716981132075472},
{'a': 0.8672086720867209,
'f1': 0.8122605363984674,
'p': 0.8153846153846154,
'r': 0.8091603053435115},
{'a': 0.8849701573521432,
'f1': 0.8921668362156663,
'p': 0.8921668362156663,
'r': 0.8921668362156663},
{'a': 0.7781094527363184,
'f1': 0.6836879432624113,
'p': 0.5863746958637469,
'r': 0.8197278911564626},
{'a': 0.8762641284949435,
'f1': 0.8725490196078431,
'p': 0.8651275820170109,
'r': 0.8800988875154512}],
'www.latimes.com;2000': [{'a': 0.8859138533178114,
'f1': 0.9079812206572769,
'p': 0.8719567177637512,
'r': 0.9471106758080313},
{'a': 0.9060402684563759,
'f1': 0.9296754250386399,
'p': 0.9011235955056179,
'r': 0.960095770151636},
{'a': 0.8181818181818182,
'f1': 0.7275541795665633,
'p': 0.6167979002624672,
'r': 0.8867924528301887},
{'a': 0.8636176349402555,
'f1': 0.8996665656259473,
'p': 0.8436611711199545,
'r': 0.9636363636363636},
{'a': 0.9081803005008348,
'f1': 0.9397590361445783,
'p': 0.9081287044877223,
'r': 0.9736722650930549},
{'a': 0.8790149892933619,
'f1': 0.9011373578302712,
'p': 0.865546218487395,
'r': 0.9397810218978102},
{'a': 0.8671875,
'f1': 0.8919262555626193,
'p': 0.8565323565323565,
'r': 0.9303713527851459},
{'a': 0.8974439886399496,
'f1': 0.8914132976946207,
'p': 0.8567758509955041,
'r': 0.9289693593314763},
{'a': 0.906754772393539,
'f1': 0.9151069518716578,
'p': 0.8912760416666666,
'r': 0.9402472527472527},
{'a': 0.864039408866995,
'f1': 0.8217054263565892,
'p': 0.7718446601941747,
'r': 0.8784530386740331},
{'a': 0.95949263502455,
'f1': 0.9440993788819876,
'p': 0.9675925925925926,
'r': 0.9217199558985667},
{'a': 0.8910433979686058,
'f1': 0.9293695131683959,
'p': 0.8885921404044258,
'r': 0.9740694270179842},
{'a': 0.8336025848142165,
'f1': 0.851227732306211,
'p': 0.7864768683274022,
'r': 0.9275970619097587},
{'a': 0.8811685748124753,
'f1': 0.9156153630501823,
'p': 0.8850948509485095,
'r': 0.9483159117305459},
{'a': 0.879980563654033,
'f1': 0.9096892138939672,
'p': 0.8723702664796634,
'r': 0.9503437738731857},
{'a': 0.8948170731707317,
'f1': 0.9210827296988181,
'p': 0.884981684981685,
'r': 0.9602543720190779},
{'a': 0.8715647784632642,
'f1': 0.9174477289113193,
'p': 0.8724717175179979,
'r': 0.9673128088179399},
{'a': 0.8863366336633663,
'f1': 0.9081011847582452,
'p': 0.8807453416149068,
'r': 0.9372108393919365},
{'a': 0.8540250447227191,
'f1': 0.9017341040462428,
'p': 0.8482102401449932,
'r': 0.9624678663239075},
{'a': 0.8820047355958959,
'f1': 0.9161290322580645,
'p': 0.8850948509485095,
'r': 0.9494186046511628},
{'a': 0.875943000838223,
'f1': 0.9118522930315663,
'p': 0.870380898237635,
'r': 0.957473420888055},
{'a': 0.8548465660009742,
'f1': 0.8876319758672699,
'p': 0.8306280875088214,
'r': 0.9530364372469635},
{'a': 0.867056856187291,
'f1': 0.8451801363193768,
'p': 0.8097014925373134,
'r': 0.8839103869653768},
{'a': 0.8775137111517367,
'f1': 0.9103678929765886,
'p': 0.8635786802030457,
'r': 0.9625176803394625},
{'a': 0.9000886786875554,
'f1': 0.9353481254781943,
'p': 0.9015486725663717,
'r': 0.9717806041335453}],
'www.latimes.com;2005': [{'a': 0.7422196124486201,
'f1': 0.7298461538461539,
'p': 0.6161038961038962,
'r': 0.8950943396226415},
{'a': 0.6051838456901748,
'f1': 0.4910644910644911,
'p': 0.3484013230429989,
'r': 0.8315789473684211},
{'a': 0.7498252969951084,
'f1': 0.7250384024577572,
'p': 0.6059050064184852,
'r': 0.9024856596558317},
{'a': 0.7478488589599701,
'f1': 0.7092320966350302,
'p': 0.5796897038081805,
'r': 0.9133333333333333},
{'a': 0.779495990836197,
'f1': 0.7187728268809349,
'p': 0.5992691839220463,
'r': 0.8978102189781022},
{'a': 0.9032732622287606,
'f1': 0.927488282326992,
'p': 0.9241758241758242,
'r': 0.9308245711123408},
{'a': 0.8685015290519877,
'f1': 0.7754569190600522,
'p': 0.7156626506024096,
'r': 0.8461538461538461},
{'a': 0.7058096415327565,
'f1': 0.7202194357366772,
'p': 0.6038107752956636,
'r': 0.8922330097087379},
{'a': 0.7935819601040763,
'f1': 0.8212318477716575,
'p': 0.7263064658990257,
'r': 0.9447004608294931},
{'a': 0.7500845451471085,
'f1': 0.8024592354985296,
'p': 0.7117117117117117,
'r': 0.9197303921568627},
{'a': 0.7272727272727273,
'f1': 0.7387698686938493,
'p': 0.619351100811124,
'r': 0.9152397260273972},
{'a': 0.8605957446808511,
'f1': 0.9013134112543679,
'p': 0.8558352402745996,
'r': 0.9518961567828964},
{'a': 0.7504501260352899,
'f1': 0.7099204688154039,
'p': 0.5812200137080192,
'r': 0.9118279569892473},
{'a': 0.7527058051820269,
'f1': 0.7320540156361051,
'p': 0.6311274509803921,
'r': 0.871404399323181},
{'a': 0.7738570113531759,
'f1': 0.7661059980958426,
'p': 0.6675884955752213,
'r': 0.8987341772151899},
{'a': 0.7750533049040512,
'f1': 0.7287917737789202,
'p': 0.6169749727965179,
'r': 0.8901098901098901},
{'a': 0.8182175107970161,
'f1': 0.84127528282482,
'p': 0.7649625935162094,
'r': 0.9345011424219345},
{'a': 0.7622868605817452,
'f1': 0.7208480565371025,
'p': 0.6169354838709677,
'r': 0.8668555240793201},
{'a': 0.6897179253867152,
'f1': 0.6960784313725491,
'p': 0.5606604450825556,
'r': 0.917743830787309},
{'a': 0.6222222222222222,
'f1': 0.3751178133836004,
'p': 0.24968632371392724,
'r': 0.7537878787878788},
{'a': 0.6855524079320113,
'f1': 0.6961678832116787,
'p': 0.5593841642228738,
'r': 0.9214975845410628},
{'a': 0.8013661202185792,
'f1': 0.851844304055431,
'p': 0.7766629505759941,
'r': 0.9431407942238267},
{'a': 0.9086802194256212,
'f1': 0.9246739419749801,
'p': 0.9234449760765551,
'r': 0.92590618336887},
{'a': 0.732839313572543,
'f1': 0.7519014849692142,
'p': 0.6306196840826246,
'r': 0.9309417040358744},
{'a': 0.6179577464788732,
'f1': 0.354806739345887,
'p': 0.2315653298835705,
'r': 0.7584745762711864}],
'www.latimes.com;2010': [{'a': 0.5850843444806155,
'f1': 0.49166062364031904,
'p': 0.3570300157977883,
'r': 0.789289871944121},
{'a': 0.49572649572649574,
'f1': 0.41124886604172967,
'p': 0.27287319422150885,
'r': 0.8343558282208589},
{'a': 0.6794190577399929,
'f1': 0.3728343728343728,
'p': 0.2642436149312377,
'r': 0.6329411764705882},
{'a': 0.7071513002364066,
'f1': 0.47704485488126647,
'p': 0.3772954924874791,
'r': 0.648493543758967},
{'a': 0.8660617059891107,
'f1': 0.8762990278243379,
'p': 0.8378205128205128,
'r': 0.9184820801124385},
{'a': 0.8609794628751974,
'f1': 0.8613081166272655,
'p': 0.813849590469099,
'r': 0.9146443514644351},
{'a': 0.5482108713466266,
'f1': 0.4580602883355177,
'p': 0.3210840606338999,
'r': 0.7988571428571428},
{'a': 0.7263668192835981,
'f1': 0.49653121902874137,
'p': 0.3691967575534267,
'r': 0.7579425113464447},
{'a': 0.8486257928118394,
'f1': 0.8380090497737557,
'p': 0.7860780984719864,
'r': 0.8972868217054264},
{'a': 0.8374751491053678,
'f1': 0.7872478854912167,
'p': 0.7092614302461899,
'r': 0.8845029239766082},
{'a': 0.5851926977687627,
'f1': 0.1452455590386625,
'p': 0.08128654970760234,
'r': 0.6813725490196079},
{'a': 0.8460222412318221,
'f1': 0.8331788693234478,
'p': 0.779705117085863,
'r': 0.8945273631840795},
{'a': 0.7058642922935217,
'f1': 0.5801928133216476,
'p': 0.4652143359100492,
'r': 0.770663562281723},
{'a': 0.574838388861263,
'f1': 0.5037724898432966,
'p': 0.36717428087986465,
'r': 0.8022181146025879},
{'a': 0.63409915356711,
'f1': 0.41013645224171547,
'p': 0.2743870631194575,
'r': 0.8117283950617284},
{'a': 0.5813497619714366,
'f1': 0.27532719340765877,
'p': 0.1658878504672897,
'r': 0.8091168091168092},
{'a': 0.7512280701754386,
'f1': 0.5599006828057107,
'p': 0.41566820276497696,
'r': 0.8574144486692015},
{'a': 0.5737658674188999,
'f1': 0.5066927848514529,
'p': 0.3653483992467043,
'r': 0.8264110756123536},
{'a': 0.44818136522172397,
'f1': 0.33860853986264555,
'p': 0.21283783783783783,
'r': 0.8277372262773722},
{'a': 0.5393258426966292,
'f1': 0.43914415994387934,
'p': 0.30023980815347723,
'r': 0.8172323759791122},
{'a': 0.8528493364558938,
'f1': 0.8547206165703276,
'p': 0.811265544989027,
'r': 0.9030944625407166},
{'a': 0.5451306413301663,
'f1': 0.44046749452154854,
'p': 0.3015,
'r': 0.8170731707317073},
{'a': 0.6951649055395454,
'f1': 0.5310344827586208,
'p': 0.41945525291828795,
'r': 0.723489932885906},
{'a': 0.8481414324569356,
'f1': 0.8239621650026274,
'p': 0.7574879227053141,
'r': 0.9032258064516129},
{'a': 0.7052851597491788,
'f1': 0.2595648912228057,
'p': 0.1610800744878957,
'r': 0.667953667953668}],
'www.latimes.com;2015': [{'a': 0.5938778389053463,
'f1': 0.1715107913669065,
'p': 0.09600515463917526,
'r': 0.8032345013477089},
{'a': 0.63568345323741,
'f1': 0.18637532133676094,
'p': 0.10681399631675875,
'r': 0.7304785894206549},
{'a': 0.5944452121044632,
'f1': 0.19522895530573073,
'p': 0.11069651741293532,
'r': 0.8259860788863109},
{'a': 0.311409056412851,
'f1': 0.35004775549188155,
'p': 0.21673565937315198,
'r': 0.9094292803970223},
{'a': 0.23751617076326004,
'f1': 0.21643180005317736,
'p': 0.12378345498783455,
'r': 0.8604651162790697},
{'a': 0.5991861648016277,
'f1': 0.13758599124452783,
'p': 0.07498295841854125,
'r': 0.8333333333333334},
{'a': 0.3306508875739645,
'f1': 0.3897280966767372,
'p': 0.24814509480626545,
'r': 0.907537688442211},
{'a': 0.603215251102575,
'f1': 0.1875910282551704,
'p': 0.10595590654820665,
'r': 0.817258883248731},
{'a': 0.5912418842381545,
'f1': 0.19832023841777296,
'p': 0.11244239631336406,
'r': 0.8394495412844036},
{'a': 0.6033519553072626,
'f1': 0.292358803986711,
'p': 0.1753487048107031,
'r': 0.8787446504992867},
{'a': 0.580749718151071,
'f1': 0.12164157071154416,
'p': 0.06592,
'r': 0.7862595419847328},
{'a': 0.5854936959909336,
'f1': 0.13534278959810875,
'p': 0.07360977177756349,
'r': 0.8388278388278388},
{'a': 0.23728315201411349,
'f1': 0.19089207735495947,
'p': 0.10778443113772455,
'r': 0.8337874659400545},
{'a': 0.5976490582070528,
'f1': 0.17484751670055182,
'p': 0.09798177083333333,
'r': 0.8113207547169812},
{'a': 0.601409666283084,
'f1': 0.15851806863042817,
'p': 0.08805668016194332,
'r': 0.7933130699088146},
{'a': 0.5995661605206074,
'f1': 0.1482620732082436,
'p': 0.08166723144696712,
'r': 0.8033333333333333},
{'a': 0.6116892373485389,
'f1': 0.18491921005385994,
'p': 0.10492359932088285,
'r': 0.7783375314861462},
{'a': 0.59974993053626,
'f1': 0.20786362386582347,
'p': 0.11954459203036052,
'r': 0.7957894736842105},
{'a': 0.5972012621758814,
'f1': 0.22818086225026288,
'p': 0.13183475091130012,
'r': 0.84765625},
{'a': 0.2567389875082183,
'f1': 0.19565990750622553,
'p': 0.11079774375503626,
'r': 0.8358662613981763},
{'a': 0.6091758708581139,
'f1': 0.22384701912260968,
'p': 0.1285529715762274,
'r': 0.8652173913043478},
{'a': 0.608569161597461,
'f1': 0.31417979610750696,
'p': 0.19093213179386087,
'r': 0.8862745098039215},
{'a': 0.27805978567399886,
'f1': 0.26857142857142857,
'p': 0.15921409214092141,
'r': 0.8576642335766423},
{'a': 0.2214304565848509,
'f1': 0.1873278236914601,
'p': 0.10559006211180125,
'r': 0.8292682926829268},
{'a': 0.23664980326025858,
'f1': 0.20631209818819407,
'p': 0.11719787516600266,
'r': 0.8609756097560975}],
'www.nymag.com;2000': [{'a': 0.9425414364640884,
'f1': 0.9440860215053763,
'p': 0.9251844046364595,
'r': 0.9637760702524698},
{'a': 0.9427288040426727,
'f1': 0.9430803571428572,
'p': 0.9224890829694323,
'r': 0.9646118721461188},
{'a': 0.9402366863905326,
'f1': 0.9463051568314725,
'p': 0.9368421052631579,
'r': 0.9559613319011815},
{'a': 0.9270248596631917,
'f1': 0.9103448275862068,
'p': 0.8733459357277883,
'r': 0.9506172839506173},
{'a': 0.9404255319148936,
'f1': 0.851851851851852,
'p': 0.8341968911917098,
'r': 0.8702702702702703},
{'a': 0.9549382716049383,
'f1': 0.9581181870338497,
'p': 0.9619815668202765,
'r': 0.9542857142857143},
{'a': 0.9580137262817925,
'f1': 0.9697146185206756,
'p': 0.9714119019836639,
'r': 0.9680232558139535},
{'a': 0.9394673123486683,
'f1': 0.9440089585666294,
'p': 0.9366666666666666,
'r': 0.9514672686230248},
{'a': 0.9344746162927982,
'f1': 0.9410515135422199,
'p': 0.9267782426778243,
'r': 0.9557713052858684},
{'a': 0.5105755041810133,
'f1': 0.4271732872769142,
'p': 0.28277439024390244,
'r': 0.8729411764705882},
{'a': 0.9386454183266932,
'f1': 0.9216683621566633,
'p': 0.9114688128772636,
'r': 0.9320987654320988},
{'a': 0.627173213135866,
'f1': 0.5021496130696474,
'p': 0.34803337306317045,
'r': 0.9012345679012346},
{'a': 0.9426644182124789,
'f1': 0.9506292352371732,
'p': 0.9370229007633588,
'r': 0.9646365422396856},
{'a': 0.9452054794520548,
'f1': 0.9559902200488998,
'p': 0.9630541871921182,
'r': 0.9490291262135923},
{'a': 0.9378813089295619,
'f1': 0.946360153256705,
'p': 0.9285714285714286,
'r': 0.96484375},
{'a': 0.9535490605427975,
'f1': 0.9616213885295387,
'p': 0.948936170212766,
'r': 0.9746503496503497},
{'a': 0.6271008403361344,
'f1': 0.5862470862470862,
'p': 0.4280851063829787,
'r': 0.9297597042513863},
{'a': 0.9449612403100776,
'f1': 0.9086229086229086,
'p': 0.9145077720207254,
'r': 0.9028132992327366},
{'a': 0.9414389291689905,
'f1': 0.9415041782729805,
'p': 0.9378468368479467,
'r': 0.9451901565995525},
{'a': 0.9498181818181818,
'f1': 0.9332042594385286,
'p': 0.9323017408123792,
'r': 0.9341085271317829},
{'a': 0.9301221166892809,
'f1': 0.9286209286209286,
'p': 0.8993288590604027,
'r': 0.9598853868194842},
{'a': 0.9354838709677419,
'f1': 0.932415519399249,
'p': 0.9085365853658537,
'r': 0.9575835475578406},
{'a': 0.9311145510835913,
'f1': 0.9187214611872146,
'p': 0.8902654867256637,
'r': 0.9490566037735849},
{'a': 0.9412225705329154,
'f1': 0.9006622516556291,
'p': 0.9139784946236559,
'r': 0.8877284595300261},
{'a': 0.9436519258202568,
'f1': 0.9399239543726237,
'p': 0.9507692307692308,
'r': 0.9293233082706767}],
'www.nymag.com;2005': [{'a': 0.7711069418386491,
'f1': 0.7621832358674464,
'p': 0.6729776247848537,
'r': 0.8786516853932584},
{'a': 0.7747963584091998,
'f1': 0.7693817468105986,
'p': 0.6782006920415224,
'r': 0.8888888888888888},
{'a': 0.7776712985146143,
'f1': 0.7600827300930713,
'p': 0.6693989071038251,
'r': 0.8791866028708134},
{'a': 0.7718120805369127,
'f1': 0.7603036876355748,
'p': 0.6594543744120414,
'r': 0.8975672215108835},
{'a': 0.772093023255814,
'f1': 0.7591480065537956,
'p': 0.6575212866603595,
'r': 0.8979328165374677},
{'a': 0.6690590111642744,
'f1': 0.5300113250283126,
'p': 0.3848684210526316,
'r': 0.850909090909091},
{'a': 0.6889952153110048,
'f1': 0.6470131885182312,
'p': 0.4970202622169249,
'r': 0.9266666666666666},
{'a': 0.7529296875,
'f1': 0.7548449612403101,
'p': 0.6629787234042553,
'r': 0.876265466816648},
{'a': 0.7890173410404624,
'f1': 0.7859237536656892,
'p': 0.7030430220356768,
'r': 0.8909574468085106},
{'a': 0.8201140487299119,
'f1': 0.8394261915779732,
'p': 0.7699490662139219,
'r': 0.9226856561546287},
{'a': 0.7540029112081513,
'f1': 0.7907552620718118,
'p': 0.6794326241134752,
'r': 0.945705824284304},
{'a': 0.7821091505949939,
'f1': 0.8270921523933572,
'p': 0.7345286292654714,
'r': 0.9463487332339792},
{'a': 0.7581395348837209,
'f1': 0.6584564860426929,
'p': 0.5976154992548435,
'r': 0.7330895795246801},
{'a': 0.6695778748180495,
'f1': 0.6828132277596646,
'p': 0.5461997019374069,
'r': 0.9105590062111801},
{'a': 0.697495183044316,
'f1': 0.7186379928315413,
'p': 0.5905743740795287,
'r': 0.9176201372997712},
{'a': 0.7282120395327942,
'f1': 0.7632093933463796,
'p': 0.6482712765957447,
'r': 0.9276879162702188},
{'a': 0.8176943699731903,
'f1': 0.8482142857142857,
'p': 0.7840440165061898,
'r': 0.9238249594813615},
{'a': 0.7697462900909526,
'f1': 0.7593796898449224,
'p': 0.6704946996466431,
'r': 0.8754325259515571},
{'a': 0.7224770642201835,
'f1': 0.7547628698824482,
'p': 0.6363636363636364,
'r': 0.9272908366533864},
{'a': 0.7748896517900932,
'f1': 0.7610619469026549,
'p': 0.6688014638609332,
'r': 0.8828502415458938},
{'a': 0.7994902293967715,
'f1': 0.814026792750197,
'p': 0.7405017921146954,
'r': 0.9037620297462817},
{'a': 0.7799607072691552,
'f1': 0.7812499999999999,
'p': 0.6884681583476764,
'r': 0.9029345372460497},
{'a': 0.6740623349181194,
'f1': 0.6716338477913784,
'p': 0.531592249368155,
'r': 0.911849710982659},
{'a': 0.7434108527131783,
'f1': 0.602641056422569,
'p': 0.47992351816443596,
'r': 0.8096774193548387},
{'a': 0.6736401673640168,
'f1': 0.6729559748427673,
'p': 0.5358931552587646,
'r': 0.9042253521126761},
{'a': 0.768056968463886,
'f1': 0.7894736842105263,
'p': 0.6979591836734694,
'r': 0.9086078639744952}],
'www.nymag.com;2010': [{'a': 0.48481943112815595,
'f1': 0.0627906976744186,
'p': 0.037241379310344824,
'r': 0.2},
{'a': 0.44631901840490795,
'f1': 0.33419402434526,
'p': 0.20798898071625344,
'r': 0.849906191369606},
{'a': 0.3217094017094017,
'f1': 0.036893203883495145,
'p': 0.01954732510288066,
'r': 0.3275862068965517},
{'a': 0.311042524005487,
'f1': 0.02899951667472209,
'p': 0.015511892450879007,
'r': 0.2222222222222222},
{'a': 0.4183240952070427,
'f1': 0.22904062229904926,
'p': 0.1347914547304171,
'r': 0.7614942528735632},
{'a': 0.3187355943365163,
'f1': 0.1510053344275749,
'p': 0.08329560887279312,
'r': 0.8070175438596491},
{'a': 0.41282778171509565,
'f1': 0.043854587420657815,
'p': 0.023944549464398234,
'r': 0.2602739726027397},
{'a': 0.43229657555765,
'f1': 0.2786427145708583,
'p': 0.16941747572815535,
'r': 0.7842696629213484},
{'a': 0.3127237227465018,
'f1': 0.14146341463414633,
'p': 0.0777479892761394,
'r': 0.7837837837837838},
{'a': 0.30776762402088775,
'f1': 0.1360488798370672,
'p': 0.07435440783615316,
'r': 0.7990430622009569},
{'a': 0.5344157329064715,
'f1': 0.5479573712255773,
'p': 0.3932441045251753,
'r': 0.9033674963396779},
{'a': 0.3160771704180064,
'f1': 0.15829046299960425,
'p': 0.08798944126704795,
'r': 0.7874015748031497},
{'a': 0.3274732850741124,
'f1': 0.03938946331856228,
'p': 0.021197668256491786,
'r': 0.2777777777777778},
{'a': 0.309208290859667,
'f1': 0.031443544545021435,
'p': 0.016516516516516516,
'r': 0.32673267326732675},
{'a': 0.3213815789473684,
'f1': 0.16104107360715736,
'p': 0.0894713059195662,
'r': 0.8048780487804879},
{'a': 0.32691658223573117,
'f1': 0.15658061785865424,
'p': 0.08714083843617522,
'r': 0.7708333333333334},
{'a': 0.4134419551934827,
'f1': 0.187206020696143,
'p': 0.1067024128686327,
'r': 0.7624521072796935},
{'a': 0.4195666447800394,
'f1': 0.23130434782608694,
'p': 0.13516260162601626,
'r': 0.8012048192771084},
{'a': 0.5790219702338767,
'f1': 0.5123152709359605,
'p': 0.3659824046920821,
'r': 0.853625170998632},
{'a': 0.48916909149692855,
'f1': 0.05952380952380952,
'p': 0.03333333333333333,
'r': 0.2777777777777778},
{'a': 0.3778471138845554,
'f1': 0.3018207282913165,
'p': 0.18340425531914895,
'r': 0.8517786561264822},
{'a': 0.3234536082474227,
'f1': 0.16930379746835442,
'p': 0.09460654288240496,
'r': 0.8045112781954887},
{'a': 0.4106593782029382,
'f1': 0.18207681365576103,
'p': 0.10339256865912763,
'r': 0.7619047619047619},
{'a': 0.40123034859876966,
'f1': 0.04782608695652174,
'p': 0.026112759643916916,
'r': 0.2838709677419355},
{'a': 0.5376782077393075,
'f1': 0.5942806076854334,
'p': 0.44127405441274054,
'r': 0.9097127222982216}],
'www.nymag.com;2015': [{'a': 0.12131556489201077,
'f1': 0.0027319011548491245,
'p': 0.0013710582076529975,
'r': 0.36666666666666664},
{'a': 0.12269175361243288,
'f1': 0.003965008797363269,
'p': 0.001994266483858906,
'r': 0.33613445378151263},
{'a': 0.12294602844710008,
'f1': 0.02455937590291823,
'p': 0.01246913278403951,
'r': 0.8082408874801902},
{'a': 0.16712910070181167,
'f1': 0.05854498493327594,
'p': 0.030300136859861868,
'r': 0.8631006346328196},
{'a': 0.25470154326426825,
'f1': 0.25441998690374257,
'p': 0.14750459948018574,
'r': 0.9245835621453414},
{'a': 0.17664334917498797,
'f1': 0.0838928168260947,
'p': 0.04405891163255117,
'r': 0.8748451053283767},
{'a': 0.12178293724674187,
'f1': 0.0027855845996965704,
'p': 0.0013984966161376521,
'r': 0.34146341463414637},
{'a': 0.12372770769899956,
'f1': 0.005528816487720596,
'p': 0.0027873870734924466,
'r': 0.33532934131736525},
{'a': 0.12206695969734742,
'f1': 0.003672912271994044,
'p': 0.0018477826608070316,
'r': 0.29959514170040485},
{'a': 0.12218016322779686,
'f1': 0.0030812812166090995,
'p': 0.0015481035731229244,
'r': 0.31958762886597936},
{'a': 0.12040199256052729,
'f1': 0.01787622656174099,
'p': 0.009036884239477433,
'r': 0.8177777777777778},
{'a': 0.15947986577181208,
'f1': 0.004504355314145663,
'p': 0.002267724938304542,
'r': 0.3285024154589372},
{'a': 0.12283227537464826,
'f1': 0.004061818902318208,
'p': 0.002044123145955378,
'r': 0.31417624521072796},
{'a': 0.131848751352114,
'f1': 0.02022239324858682,
'p': 0.010238357563217155,
'r': 0.8141025641025641},
{'a': 0.294921875,
'f1': 0.34880605811648296,
'p': 0.21353597200962168,
'r': 0.9516616314199395},
{'a': 0.12296532587559265,
'f1': 0.004612294492523619,
'p': 0.0023225033089428865,
'r': 0.3274647887323944},
{'a': 0.12218213621952553,
'f1': 0.003573999156139088,
'p': 0.001795914294978923,
'r': 0.36},
{'a': 0.627518315018315,
'f1': 0.7415409054805402,
'p': 0.6018566271273853,
'r': 0.965659908978072},
{'a': 0.12394710426395496,
'f1': 0.005351568306823249,
'p': 0.002696965913347484,
'r': 0.34069400630914826},
{'a': 0.12461045612046985,
'f1': 0.0060868489422244215,
'p': 0.003071928071928072,
'r': 0.328},
{'a': 0.16345886410413307,
'f1': 0.009158293526601878,
'p': 0.004635187408296652,
'r': 0.3787465940054496},
{'a': 0.1570371188687583,
'f1': 0.030863021527910078,
'p': 0.01572306696861857,
'r': 0.8321917808219178},
{'a': 0.12216138391396349,
'f1': 0.003220133263976617,
'p': 0.001618002140741294,
'r': 0.3282828282828283},
{'a': 0.12024096385542168,
'f1': 0.0014917580368464236,
'p': 0.0007477380922708806,
'r': 0.3},
{'a': 0.12138728323699421,
'f1': 0.0031796502384737677,
'p': 0.001597444089456869,
'r': 0.3333333333333333},
{'a': 0.1540937213883417,
'f1': 0.02350831118086026,
'p': 0.011924876527164023,
'r': 0.8210290827740492}]}]
>>> trimmed_results[0][0]
22: 'c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter'
>>> for basepath,trimmed in trimmed_results:
... print(basepath)
... for key,val in trimmed.items():
... print(key.split(";"))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
['news.yahoo.com', '2000']
['www.cnn.com', '2005']
['news.yahoo.com', '2005']
['www.cnn.com', '2000']
['www.esquire.com', '2010']
['www.nymag.com', '2015']
['www.latimes.com', '2005']
['www.latimes.com', '2000']
['www.esquire.com', '2000']
['www.foxnews.com', '2010']
['www.foxnews.com', '2015']
['www.forbes.com', '2000']
['news.bbc.co.uk', '2015']
['www.forbes.com', '2005']
['news.bbc.co.uk', '2010']
['entertainment.msn.com', '2000']
['entertainment.msn.com', '2005']
['thenation.com', '2015']
['thenation.com', '2010']
['news.yahoo.com', '2010']
['news.yahoo.com', '2015']
['www.cnn.com', '2010']
['www.cnn.com', '2015']
['www.latimes.com', '2015']
['www.latimes.com', '2010']
['www.foxnews.com', '2000']
['www.foxnews.com', '2005']
['www.nymag.com', '2000']
['news.bbc.co.uk', '2005']
['www.forbes.com', '2015']
['news.bbc.co.uk', '2000']
['www.nymag.com', '2010']
['www.forbes.com', '2010']
['thenation.com', '2005']
['entertainment.msn.com', '2010']
['entertainment.msn.com', '2015']
['www.esquire.com', '2015']
['thenation.com', '2000']
['www.nymag.com', '2005']
['www.esquire.com', '2005']
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
['news.yahoo.com', '2000']
['www.cnn.com', '2005']
['news.yahoo.com', '2005']
['www.cnn.com', '2000']
['www.esquire.com', '2010']
['www.nymag.com', '2015']
['www.latimes.com', '2005']
['www.latimes.com', '2000']
['www.esquire.com', '2000']
['www.foxnews.com', '2010']
['www.foxnews.com', '2015']
['www.forbes.com', '2000']
['news.bbc.co.uk', '2015']
['www.forbes.com', '2005']
['news.bbc.co.uk', '2010']
['entertainment.msn.com', '2000']
['entertainment.msn.com', '2005']
['thenation.com', '2015']
['thenation.com', '2010']
['news.yahoo.com', '2010']
['news.yahoo.com', '2015']
['www.cnn.com', '2010']
['www.cnn.com', '2015']
['www.latimes.com', '2015']
['www.latimes.com', '2010']
['www.foxnews.com', '2000']
['www.foxnews.com', '2005']
['www.nymag.com', '2000']
['news.bbc.co.uk', '2005']
['www.forbes.com', '2015']
['news.bbc.co.uk', '2000']
['www.nymag.com', '2010']
['www.forbes.com', '2010']
['thenation.com', '2005']
['entertainment.msn.com', '2010']
['entertainment.msn.com', '2015']
['www.esquire.com', '2015']
['thenation.com', '2000']
['www.nymag.com', '2005']
['www.esquire.com', '2005']
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
['news.yahoo.com', '2000']
['www.cnn.com', '2005']
['news.yahoo.com', '2005']
['www.cnn.com', '2000']
['www.esquire.com', '2010']
['www.nymag.com', '2015']
['www.latimes.com', '2005']
['www.latimes.com', '2000']
['www.esquire.com', '2000']
['www.foxnews.com', '2010']
['www.foxnews.com', '2015']
['www.forbes.com', '2000']
['news.bbc.co.uk', '2015']
['www.forbes.com', '2005']
['news.bbc.co.uk', '2010']
['entertainment.msn.com', '2000']
['entertainment.msn.com', '2005']
['thenation.com', '2015']
['thenation.com', '2010']
['news.yahoo.com', '2010']
['news.yahoo.com', '2015']
['www.cnn.com', '2010']
['www.cnn.com', '2015']
['www.latimes.com', '2015']
['www.latimes.com', '2010']
['www.foxnews.com', '2000']
['www.foxnews.com', '2005']
['www.nymag.com', '2000']
['news.bbc.co.uk', '2005']
['www.forbes.com', '2015']
['news.bbc.co.uk', '2000']
['www.nymag.com', '2010']
['www.forbes.com', '2010']
['thenation.com', '2005']
['entertainment.msn.com', '2010']
['entertainment.msn.com', '2015']
['www.esquire.com', '2015']
['thenation.com', '2000']
['www.nymag.com', '2005']
['www.esquire.com', '2005']
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
['news.yahoo.com', '2000']
['www.cnn.com', '2005']
['news.yahoo.com', '2005']
['www.cnn.com', '2000']
['www.esquire.com', '2010']
['www.nymag.com', '2015']
['www.latimes.com', '2005']
['www.latimes.com', '2000']
['www.esquire.com', '2000']
['www.foxnews.com', '2010']
['www.foxnews.com', '2015']
['www.forbes.com', '2000']
['news.bbc.co.uk', '2015']
['www.forbes.com', '2005']
['news.bbc.co.uk', '2010']
['entertainment.msn.com', '2000']
['entertainment.msn.com', '2005']
['thenation.com', '2015']
['thenation.com', '2010']
['news.yahoo.com', '2010']
['news.yahoo.com', '2015']
['www.cnn.com', '2010']
['www.cnn.com', '2015']
['www.latimes.com', '2015']
['www.latimes.com', '2010']
['www.foxnews.com', '2000']
['www.foxnews.com', '2005']
['www.nymag.com', '2000']
['news.bbc.co.uk', '2005']
['www.forbes.com', '2015']
['news.bbc.co.uk', '2000']
['www.nymag.com', '2010']
['www.forbes.com', '2010']
['thenation.com', '2005']
['entertainment.msn.com', '2010']
['entertainment.msn.com', '2015']
['www.esquire.com', '2015']
['thenation.com', '2000']
['www.nymag.com', '2005']
['www.esquire.com', '2005']
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
['news.yahoo.com', '2000']
['www.cnn.com', '2005']
['news.yahoo.com', '2005']
['www.cnn.com', '2000']
['www.esquire.com', '2010']
['www.nymag.com', '2015']
['www.latimes.com', '2005']
['www.latimes.com', '2000']
['www.esquire.com', '2000']
['www.foxnews.com', '2010']
['www.foxnews.com', '2015']
['www.forbes.com', '2000']
['news.bbc.co.uk', '2015']
['www.forbes.com', '2005']
['news.bbc.co.uk', '2010']
['entertainment.msn.com', '2000']
['entertainment.msn.com', '2005']
['thenation.com', '2015']
['thenation.com', '2010']
['news.yahoo.com', '2010']
['news.yahoo.com', '2015']
['www.cnn.com', '2010']
['www.cnn.com', '2015']
['www.latimes.com', '2015']
['www.latimes.com', '2010']
['www.foxnews.com', '2000']
['www.foxnews.com', '2005']
['www.nymag.com', '2000']
['news.bbc.co.uk', '2005']
['www.forbes.com', '2015']
['news.bbc.co.uk', '2000']
['www.nymag.com', '2010']
['www.forbes.com', '2010']
['thenation.com', '2005']
['entertainment.msn.com', '2010']
['entertainment.msn.com', '2015']
['www.esquire.com', '2015']
['thenation.com', '2000']
['www.nymag.com', '2005']
['www.esquire.com', '2005']
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
['news.yahoo.com', '2000']
['www.cnn.com', '2005']
['news.yahoo.com', '2005']
['www.cnn.com', '2000']
['www.esquire.com', '2010']
['www.nymag.com', '2015']
['www.latimes.com', '2005']
['www.latimes.com', '2000']
['www.esquire.com', '2000']
['www.foxnews.com', '2010']
['www.foxnews.com', '2015']
['www.forbes.com', '2000']
['news.bbc.co.uk', '2015']
['www.forbes.com', '2005']
['news.bbc.co.uk', '2010']
['entertainment.msn.com', '2000']
['entertainment.msn.com', '2005']
['thenation.com', '2015']
['thenation.com', '2010']
['news.yahoo.com', '2010']
['news.yahoo.com', '2015']
['www.cnn.com', '2010']
['www.cnn.com', '2015']
['www.latimes.com', '2015']
['www.latimes.com', '2010']
['www.foxnews.com', '2000']
['www.foxnews.com', '2005']
['www.nymag.com', '2000']
['news.bbc.co.uk', '2005']
['www.forbes.com', '2015']
['news.bbc.co.uk', '2000']
['www.nymag.com', '2010']
['www.forbes.com', '2010']
['thenation.com', '2005']
['entertainment.msn.com', '2010']
['entertainment.msn.com', '2015']
['www.esquire.com', '2015']
['thenation.com', '2000']
['www.nymag.com', '2005']
['www.esquire.com', '2005']
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
['news.yahoo.com', '2000']
['www.cnn.com', '2005']
['news.yahoo.com', '2005']
['www.cnn.com', '2000']
['www.esquire.com', '2010']
['www.nymag.com', '2015']
['www.latimes.com', '2005']
['www.latimes.com', '2000']
['www.esquire.com', '2000']
['www.foxnews.com', '2010']
['www.foxnews.com', '2015']
['www.forbes.com', '2000']
['news.bbc.co.uk', '2015']
['www.forbes.com', '2005']
['news.bbc.co.uk', '2010']
['entertainment.msn.com', '2000']
['entertainment.msn.com', '2005']
['thenation.com', '2015']
['thenation.com', '2010']
['news.yahoo.com', '2010']
['news.yahoo.com', '2015']
['www.cnn.com', '2010']
['www.cnn.com', '2015']
['www.latimes.com', '2015']
['www.latimes.com', '2010']
['www.foxnews.com', '2000']
['www.foxnews.com', '2005']
['www.nymag.com', '2000']
['news.bbc.co.uk', '2005']
['www.forbes.com', '2015']
['news.bbc.co.uk', '2000']
['www.nymag.com', '2010']
['www.forbes.com', '2010']
['thenation.com', '2005']
['entertainment.msn.com', '2010']
['entertainment.msn.com', '2015']
['www.esquire.com', '2015']
['thenation.com', '2000']
['www.nymag.com', '2005']
['www.esquire.com', '2005']
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
['news.yahoo.com', '2000']
['www.cnn.com', '2005']
['news.yahoo.com', '2005']
['www.cnn.com', '2000']
['www.esquire.com', '2010']
['www.nymag.com', '2015']
['www.latimes.com', '2005']
['www.latimes.com', '2000']
['www.esquire.com', '2000']
['www.foxnews.com', '2010']
['www.foxnews.com', '2015']
['www.forbes.com', '2000']
['news.bbc.co.uk', '2015']
['www.forbes.com', '2005']
['news.bbc.co.uk', '2010']
['entertainment.msn.com', '2000']
['entertainment.msn.com', '2005']
['thenation.com', '2015']
['thenation.com', '2010']
['news.yahoo.com', '2010']
['news.yahoo.com', '2015']
['www.cnn.com', '2010']
['www.cnn.com', '2015']
['www.latimes.com', '2015']
['www.latimes.com', '2010']
['www.foxnews.com', '2000']
['www.foxnews.com', '2005']
['www.nymag.com', '2000']
['news.bbc.co.uk', '2005']
['www.forbes.com', '2015']
['news.bbc.co.uk', '2000']
['www.nymag.com', '2010']
['www.forbes.com', '2010']
['thenation.com', '2005']
['entertainment.msn.com', '2010']
['entertainment.msn.com', '2015']
['www.esquire.com', '2015']
['thenation.com', '2000']
['www.nymag.com', '2005']
['www.esquire.com', '2005']
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
['news.yahoo.com', '2000']
['www.cnn.com', '2005']
['news.yahoo.com', '2005']
['www.cnn.com', '2000']
['www.esquire.com', '2010']
['www.nymag.com', '2015']
['www.latimes.com', '2005']
['www.latimes.com', '2000']
['www.esquire.com', '2000']
['www.foxnews.com', '2010']
['www.foxnews.com', '2015']
['www.forbes.com', '2000']
['news.bbc.co.uk', '2015']
['www.forbes.com', '2005']
['news.bbc.co.uk', '2010']
['entertainment.msn.com', '2000']
['entertainment.msn.com', '2005']
['thenation.com', '2015']
['thenation.com', '2010']
['news.yahoo.com', '2010']
['news.yahoo.com', '2015']
['www.cnn.com', '2010']
['www.cnn.com', '2015']
['www.latimes.com', '2015']
['www.latimes.com', '2010']
['www.foxnews.com', '2000']
['www.foxnews.com', '2005']
['www.nymag.com', '2000']
['news.bbc.co.uk', '2005']
['www.forbes.com', '2015']
['news.bbc.co.uk', '2000']
['www.nymag.com', '2010']
['www.forbes.com', '2010']
['thenation.com', '2005']
['entertainment.msn.com', '2010']
['entertainment.msn.com', '2015']
['www.esquire.com', '2015']
['thenation.com', '2000']
['www.nymag.com', '2005']
['www.esquire.com', '2005']
[About 368 more lines. Double-click to unfold]
>>> for basepath,trimmed in trimmed_results:
... print(basepath)
... for key,val in trimmed.items():
... print(key.split(";"))
... print("precision avg", sum([d['p'] for d in val]))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
['news.yahoo.com', '2000']
('precision avg', 21.709997561299886)
['www.cnn.com', '2005']
('precision avg', 15.52934672215807)
['news.yahoo.com', '2005']
('precision avg', 16.210148453452835)
['www.cnn.com', '2000']
('precision avg', 17.644964417699235)
['www.esquire.com', '2010']
('precision avg', 8.767268143539825)
['www.nymag.com', '2015']
('precision avg', 1.1303931912114173)
['www.latimes.com', '2005']
('precision avg', 15.717055766151395)
['www.latimes.com', '2000']
('precision avg', 21.43111160202948)
['www.esquire.com', '2000']
('precision avg', 22.09608233296812)
['www.foxnews.com', '2010']
('precision avg', 10.92415757445724)
['www.foxnews.com', '2015']
('precision avg', 19.611994030452504)
['www.forbes.com', '2000']
('precision avg', 18.838012726977052)
['news.bbc.co.uk', '2015']
('precision avg', 3.327060367297357)
['www.forbes.com', '2005']
('precision avg', 17.721852191134012)
['news.bbc.co.uk', '2010']
('precision avg', 10.43553171039551)
['entertainment.msn.com', '2000']
('precision avg', 21.160243014971154)
['entertainment.msn.com', '2005']
('precision avg', 11.925953857526848)
['thenation.com', '2015']
('precision avg', 12.554858076203239)
['thenation.com', '2010']
('precision avg', 12.289997286496956)
['news.yahoo.com', '2010']
('precision avg', 14.466883800928802)
['news.yahoo.com', '2015']
('precision avg', 0.45965000933753014)
['www.cnn.com', '2010']
('precision avg', 10.801352513419403)
['www.cnn.com', '2015']
('precision avg', 7.572812533391358)
['www.latimes.com', '2015']
('precision avg', 3.054517547790862)
['www.latimes.com', '2010']
('precision avg', 10.987271007906777)
['www.foxnews.com', '2000']
('precision avg', 21.883586569457165)
['www.foxnews.com', '2005']
('precision avg', 8.469026456627942)
['www.nymag.com', '2000']
('precision avg', 21.384378173469827)
['news.bbc.co.uk', '2005']
('precision avg', 16.915538631207994)
['www.forbes.com', '2015']
('precision avg', 8.712023890809313)
['news.bbc.co.uk', '2000']
('precision avg', 22.298132941176863)
['www.nymag.com', '2010']
('precision avg', 3.0293718718467906)
['www.forbes.com', '2010']
('precision avg', 16.511125370586573)
['thenation.com', '2005']
('precision avg', 19.766157423921204)
['entertainment.msn.com', '2010']
('precision avg', 10.151968469114125)
['entertainment.msn.com', '2015']
('precision avg', 1.850292176197399)
['www.esquire.com', '2015']
('precision avg', 1.7337883883468581)
['thenation.com', '2000']
('precision avg', 21.994771059096344)
['www.nymag.com', '2005']
('precision avg', 16.58607635847708)
['www.esquire.com', '2005']
('precision avg', 23.161147551388474)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
['news.yahoo.com', '2000']
('precision avg', 17.126235735582178)
['www.cnn.com', '2005']
('precision avg', 15.101911647275438)
['news.yahoo.com', '2005']
('precision avg', 13.844072989718052)
['www.cnn.com', '2000']
('precision avg', 16.9197390185037)
['www.esquire.com', '2010']
('precision avg', 8.329224332643067)
['www.nymag.com', '2015']
('precision avg', 1.172522261001684)
['www.latimes.com', '2005']
('precision avg', 15.209720526648315)
['www.latimes.com', '2000']
('precision avg', 23.125038705898813)
['www.esquire.com', '2000']
('precision avg', 21.03283825886228)
['www.foxnews.com', '2010']
('precision avg', 7.54908698360258)
['www.foxnews.com', '2015']
('precision avg', 15.110458841196717)
['www.forbes.com', '2000']
('precision avg', 18.29711491762898)
['news.bbc.co.uk', '2015']
('precision avg', 2.9100006776039313)
['www.forbes.com', '2005']
('precision avg', 8.838932252727698)
['news.bbc.co.uk', '2010']
('precision avg', 7.6030770753735775)
['entertainment.msn.com', '2000']
('precision avg', 19.46702250471951)
['entertainment.msn.com', '2005']
('precision avg', 11.303490169992148)
['thenation.com', '2015']
('precision avg', 11.715045712954899)
['thenation.com', '2010']
('precision avg', 10.783858985772518)
['news.yahoo.com', '2010']
('precision avg', 7.993750891559503)
['news.yahoo.com', '2015']
('precision avg', 0.4584042706281651)
['www.cnn.com', '2010']
('precision avg', 8.302471349654782)
['www.cnn.com', '2015']
('precision avg', 7.485211626252682)
['www.latimes.com', '2015']
('precision avg', 3.068218829276209)
['www.latimes.com', '2010']
('precision avg', 10.043916521470862)
['www.foxnews.com', '2000']
('precision avg', 19.993871853185343)
['www.foxnews.com', '2005']
('precision avg', 13.594306125230965)
['www.nymag.com', '2000']
('precision avg', 20.89325818344202)
['news.bbc.co.uk', '2005']
('precision avg', 12.943798953630223)
['www.forbes.com', '2015']
('precision avg', 6.97962879269541)
['news.bbc.co.uk', '2000']
('precision avg', 14.322502792957817)
['www.nymag.com', '2010']
('precision avg', 3.0176390523135947)
['www.forbes.com', '2010']
('precision avg', 13.654932840329407)
['thenation.com', '2005']
('precision avg', 18.26452648502696)
['entertainment.msn.com', '2010']
('precision avg', 8.509829625989736)
['entertainment.msn.com', '2015']
('precision avg', 1.7518551353353289)
['www.esquire.com', '2015']
('precision avg', 1.7224527532515401)
['thenation.com', '2000']
('precision avg', 21.68363869637837)
['www.nymag.com', '2005']
('precision avg', 14.658075937844664)
['www.esquire.com', '2005']
('precision avg', 23.026489051324038)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
['news.yahoo.com', '2000']
('precision avg', 17.714671995450797)
['www.cnn.com', '2005']
('precision avg', 13.630577436176257)
['news.yahoo.com', '2005']
('precision avg', 14.301343792757224)
['www.cnn.com', '2000']
('precision avg', 14.991688591599273)
['www.esquire.com', '2010']
('precision avg', 8.875552733713096)
['www.nymag.com', '2015']
('precision avg', 1.1454170008577322)
['www.latimes.com', '2005']
('precision avg', 14.965163705313335)
['www.latimes.com', '2000']
('precision avg', 20.277554814523395)
['www.esquire.com', '2000']
('precision avg', 21.514972549854498)
['www.foxnews.com', '2010']
('precision avg', 8.306597819644416)
['www.foxnews.com', '2015']
('precision avg', 14.320794935494748)
['www.forbes.com', '2000']
('precision avg', 17.909881620402356)
['news.bbc.co.uk', '2015']
('precision avg', 2.979956722384392)
['www.forbes.com', '2005']
('precision avg', 11.637716789746719)
['news.bbc.co.uk', '2010']
('precision avg', 7.7915518138570015)
['entertainment.msn.com', '2000']
('precision avg', 16.75028111523998)
['entertainment.msn.com', '2005']
('precision avg', 11.30682587858099)
['thenation.com', '2015']
('precision avg', 11.402303244796256)
['thenation.com', '2010']
('precision avg', 11.290917045046191)
['news.yahoo.com', '2010']
('precision avg', 9.605606501013865)
['news.yahoo.com', '2015']
('precision avg', 0.4552537845463904)
['www.cnn.com', '2010']
('precision avg', 8.99434189474086)
['www.cnn.com', '2015']
('precision avg', 7.235467230998295)
['www.latimes.com', '2015']
('precision avg', 2.9042368956372533)
['www.latimes.com', '2010']
('precision avg', 10.505899838593834)
['www.foxnews.com', '2000']
('precision avg', 20.096146235869746)
['www.foxnews.com', '2005']
('precision avg', 8.145349942286117)
['www.nymag.com', '2000']
('precision avg', 20.926904381736882)
['news.bbc.co.uk', '2005']
('precision avg', 14.961186606903754)
['www.forbes.com', '2015']
('precision avg', 7.393417314421486)
['news.bbc.co.uk', '2000']
('precision avg', 15.50872124562447)
['www.nymag.com', '2010']
('precision avg', 3.1542209503400658)
['www.forbes.com', '2010']
('precision avg', 15.91589594765821)
['thenation.com', '2005']
('precision avg', 19.69741469700814)
['entertainment.msn.com', '2010']
('precision avg', 7.9325724598666465)
['entertainment.msn.com', '2015']
('precision avg', 1.783633506779156)
['www.esquire.com', '2015']
('precision avg', 1.7065600794677722)
['thenation.com', '2000']
('precision avg', 22.209244538427868)
['www.nymag.com', '2005']
('precision avg', 14.537872320726189)
['www.esquire.com', '2005']
('precision avg', 22.407415273114466)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
['news.yahoo.com', '2000']
('precision avg', 21.129292189595176)
['www.cnn.com', '2005']
('precision avg', 15.346048798581277)
['news.yahoo.com', '2005']
('precision avg', 16.512018243817487)
['www.cnn.com', '2000']
('precision avg', 20.365036433175625)
['www.esquire.com', '2010']
('precision avg', 10.270523102941436)
['www.nymag.com', '2015']
('precision avg', 1.1032680407515898)
['www.latimes.com', '2005']
('precision avg', 15.701247998181)
['www.latimes.com', '2000']
('precision avg', 21.520184129503193)
['www.esquire.com', '2000']
('precision avg', 22.396131059861343)
['www.foxnews.com', '2010']
('precision avg', 11.074416007430703)
['www.foxnews.com', '2015']
('precision avg', 19.703661092655647)
['www.forbes.com', '2000']
('precision avg', 18.750609649640506)
['news.bbc.co.uk', '2015']
('precision avg', 3.337096464208538)
['www.forbes.com', '2005']
('precision avg', 16.81477842476898)
['news.bbc.co.uk', '2010']
('precision avg', 10.394775284479424)
['entertainment.msn.com', '2000']
('precision avg', 20.76984417423298)
['entertainment.msn.com', '2005']
('precision avg', 11.840389152592579)
['thenation.com', '2015']
('precision avg', 12.61273825886752)
['thenation.com', '2010']
('precision avg', 14.283209856416644)
['news.yahoo.com', '2010']
('precision avg', 14.221217639277787)
['news.yahoo.com', '2015']
('precision avg', 0.4571785139488619)
['www.cnn.com', '2010']
('precision avg', 10.633805862858868)
['www.cnn.com', '2015']
('precision avg', 7.8572826027127585)
['www.latimes.com', '2015']
('precision avg', 3.340698252680636)
['www.latimes.com', '2010']
('precision avg', 11.93753593673547)
['www.foxnews.com', '2000']
('precision avg', 22.347884550073807)
['www.foxnews.com', '2005']
('precision avg', 8.22934908747002)
['www.nymag.com', '2000']
('precision avg', 21.952295830360786)
['news.bbc.co.uk', '2005']
('precision avg', 21.741916631512822)
['www.forbes.com', '2015']
('precision avg', 8.526047499294144)
['news.bbc.co.uk', '2000']
('precision avg', 23.173303929303515)
['www.nymag.com', '2010']
('precision avg', 4.822925498909523)
['www.forbes.com', '2010']
('precision avg', 16.75114593080278)
['thenation.com', '2005']
('precision avg', 22.511004691677286)
['entertainment.msn.com', '2010']
('precision avg', 11.537933897590998)
['entertainment.msn.com', '2015']
('precision avg', 1.9048505028607705)
['www.esquire.com', '2015']
('precision avg', 1.7388897369993817)
['thenation.com', '2000']
('precision avg', 23.16227421797609)
['www.nymag.com', '2005']
('precision avg', 16.822769773645135)
['www.esquire.com', '2005']
('precision avg', 23.21359804045775)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
['news.yahoo.com', '2000']
('precision avg', 10.655185320326193)
['www.cnn.com', '2005']
('precision avg', 3.366950617951386)
['news.yahoo.com', '2005']
('precision avg', 4.938284535749449)
['www.cnn.com', '2000']
('precision avg', 9.060084796571628)
['www.esquire.com', '2010']
('precision avg', 1.8288910099133502)
['www.nymag.com', '2015']
('precision avg', 0.20115260260859147)
['www.latimes.com', '2005']
('precision avg', 14.592041756358821)
['www.latimes.com', '2000']
('precision avg', 7.771736957144365)
['www.esquire.com', '2000']
('precision avg', 9.088303801617222)
['www.foxnews.com', '2010']
('precision avg', 1.2496379525824468)
['www.foxnews.com', '2015']
('precision avg', 9.765063427120118)
['www.forbes.com', '2000']
('precision avg', 5.477357670582989)
['news.bbc.co.uk', '2015']
('precision avg', 0.9436556843129248)
['www.forbes.com', '2005']
('precision avg', 7.771616341161128)
['news.bbc.co.uk', '2010']
('precision avg', 1.5058984339368582)
['entertainment.msn.com', '2000']
('precision avg', 10.253312294593188)
['entertainment.msn.com', '2005']
('precision avg', 3.618350045901846)
['thenation.com', '2015']
('precision avg', 1.4799302651057258)
['thenation.com', '2010']
('precision avg', 5.061943252053322)
['news.yahoo.com', '2010']
('precision avg', 4.482815158986356)
['news.yahoo.com', '2015']
('precision avg', 0.3405946479462301)
['www.cnn.com', '2010']
('precision avg', 3.5336586678064035)
['www.cnn.com', '2015']
('precision avg', 1.604598785955126)
['www.latimes.com', '2015']
('precision avg', 0.9655944155998898)
['www.latimes.com', '2010']
('precision avg', 3.141217320527754)
['www.foxnews.com', '2000']
('precision avg', 15.765137056977933)
['www.foxnews.com', '2005']
('precision avg', 2.165129958375628)
['www.nymag.com', '2000']
('precision avg', 16.277000477177072)
['news.bbc.co.uk', '2005']
('precision avg', 10.593390070835223)
['www.forbes.com', '2015']
('precision avg', 2.3114365071386693)
['news.bbc.co.uk', '2000']
('precision avg', 12.127975906599914)
['www.nymag.com', '2010']
('precision avg', 2.7490768553038736)
['www.forbes.com', '2010']
('precision avg', 6.736169820769055)
['thenation.com', '2005']
('precision avg', 9.118838023437627)
['entertainment.msn.com', '2010']
('precision avg', 3.2469595644486424)
['entertainment.msn.com', '2015']
('precision avg', 0.41640169622414175)
['www.esquire.com', '2015']
('precision avg', 0.7819505614454542)
['thenation.com', '2000']
('precision avg', 14.851433804152123)
['www.nymag.com', '2005']
('precision avg', 7.626650601546752)
['www.esquire.com', '2005']
('precision avg', 12.806130538259415)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
['news.yahoo.com', '2000']
('precision avg', 5.225462396467564)
['www.cnn.com', '2005']
('precision avg', 1.4996685053643646)
['news.yahoo.com', '2005']
('precision avg', 2.6918581521582072)
['www.cnn.com', '2000']
('precision avg', 6.437881323877801)
['www.esquire.com', '2010']
('precision avg', 1.1370166318698047)
['www.nymag.com', '2015']
('precision avg', 0.6853399068075471)
['www.latimes.com', '2005']
('precision avg', 1.3397963063529734)
['www.latimes.com', '2000']
('precision avg', 6.486319416447126)
['www.esquire.com', '2000']
('precision avg', 2.1274362734040158)
['www.foxnews.com', '2010']
('precision avg', 1.3953188528437581)
['www.foxnews.com', '2015']
('precision avg', 11.273317429941478)
['www.forbes.com', '2000']
('precision avg', 1.79277085936359)
['news.bbc.co.uk', '2015']
('precision avg', 0.8593592988211299)
['www.forbes.com', '2005']
('precision avg', 1.4638521329750946)
['news.bbc.co.uk', '2010']
('precision avg', 0.7700285592515149)
['entertainment.msn.com', '2000']
('precision avg', 3.0181373840863275)
['entertainment.msn.com', '2005']
('precision avg', 0.6124694753640268)
['thenation.com', '2015']
('precision avg', 1.1294811193941194)
['thenation.com', '2010']
('precision avg', 4.321585544179479)
['news.yahoo.com', '2010']
('precision avg', 3.332540803666999)
['news.yahoo.com', '2015']
('precision avg', 0.34020559334589373)
['www.cnn.com', '2010']
('precision avg', 2.618472226258032)
['www.cnn.com', '2015']
('precision avg', 1.7222971989040494)
['www.latimes.com', '2015']
('precision avg', 1.070974291564824)
['www.latimes.com', '2010']
('precision avg', 2.269106371464356)
['www.foxnews.com', '2000']
('precision avg', 4.973038619662631)
['www.foxnews.com', '2005']
('precision avg', 1.6347612687837918)
['www.nymag.com', '2000']
('precision avg', 17.742889708751807)
['news.bbc.co.uk', '2005']
('precision avg', 4.486427723924961)
['www.forbes.com', '2015']
('precision avg', 2.0758711177790863)
['news.bbc.co.uk', '2000']
('precision avg', 5.180022648443701)
['www.nymag.com', '2010']
('precision avg', 1.1005450117812918)
['www.forbes.com', '2010']
('precision avg', 3.112722512745058)
['thenation.com', '2005']
('precision avg', 9.205478593721235)
['entertainment.msn.com', '2010']
('precision avg', 1.2103999942123915)
['entertainment.msn.com', '2015']
('precision avg', 0.4774583796363766)
['www.esquire.com', '2015']
('precision avg', 0.7672942596011267)
['thenation.com', '2000']
('precision avg', 3.399022903358817)
['www.nymag.com', '2005']
('precision avg', 1.723316388104644)
['www.esquire.com', '2005']
('precision avg', 3.9736561261571834)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
['news.yahoo.com', '2000']
('precision avg', 20.218058086201616)
['www.cnn.com', '2005']
('precision avg', 15.61607911298001)
['news.yahoo.com', '2005']
('precision avg', 16.41069341290826)
['www.cnn.com', '2000']
('precision avg', 21.371187601447243)
['www.esquire.com', '2010']
('precision avg', 9.666124807846215)
['www.nymag.com', '2015']
('precision avg', 1.14582472305598)
['www.latimes.com', '2005']
('precision avg', 15.763814820838515)
['www.latimes.com', '2000']
('precision avg', 21.4660781568977)
['www.esquire.com', '2000']
('precision avg', 22.73737790975359)
['www.foxnews.com', '2010']
('precision avg', 10.76065488967149)
['www.foxnews.com', '2015']
('precision avg', 18.574081129362774)
['www.forbes.com', '2000']
('precision avg', 18.80098770966578)
['news.bbc.co.uk', '2015']
('precision avg', 3.291925531438696)
['www.forbes.com', '2005']
('precision avg', 15.86779765377574)
['news.bbc.co.uk', '2010']
('precision avg', 10.547122562734177)
['entertainment.msn.com', '2000']
('precision avg', 20.986740253998285)
['entertainment.msn.com', '2005']
('precision avg', 11.315465939460505)
['thenation.com', '2015']
('precision avg', 12.284476799833984)
['thenation.com', '2010']
('precision avg', 13.587405736901703)
['news.yahoo.com', '2010']
('precision avg', 13.138092774422951)
['news.yahoo.com', '2015']
('precision avg', 0.45518262145300653)
['www.cnn.com', '2010']
('precision avg', 10.274083663118974)
['www.cnn.com', '2015']
('precision avg', 7.488705021626607)
['www.latimes.com', '2015']
('precision avg', 3.2359064605129237)
['www.latimes.com', '2010']
('precision avg', 11.712892915672196)
['www.foxnews.com', '2000']
('precision avg', 21.825013022354483)
['www.foxnews.com', '2005']
('precision avg', 8.156045792756732)
['www.nymag.com', '2000']
('precision avg', 21.75932747287225)
['news.bbc.co.uk', '2005']
('precision avg', 22.258237635569678)
['www.forbes.com', '2015']
('precision avg', 8.118504978263733)
['news.bbc.co.uk', '2000']
('precision avg', 23.510656664182154)
['www.nymag.com', '2010']
('precision avg', 4.653152401049681)
['www.forbes.com', '2010']
('precision avg', 17.080475125452637)
['thenation.com', '2005']
('precision avg', 21.257602919379643)
['entertainment.msn.com', '2010']
('precision avg', 9.71478591767389)
['entertainment.msn.com', '2015']
('precision avg', 1.8057927948564494)
['www.esquire.com', '2015']
('precision avg', 1.7304427333294248)
['thenation.com', '2000']
('precision avg', 22.54515675526232)
['www.nymag.com', '2005']
('precision avg', 16.088391734838744)
['www.esquire.com', '2005']
('precision avg', 23.202934360321652)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
['news.yahoo.com', '2000']
('precision avg', 15.612739663230599)
['www.cnn.com', '2005']
('precision avg', 13.02949512125241)
['news.yahoo.com', '2005']
('precision avg', 11.853424593487647)
['www.cnn.com', '2000']
('precision avg', 19.09967598024882)
['www.esquire.com', '2010']
('precision avg', 7.542087095731353)
['www.nymag.com', '2015']
('precision avg', 0.5243847345505948)
['www.latimes.com', '2005']
('precision avg', 14.450624456392976)
['www.latimes.com', '2000']
('precision avg', 21.33416067347778)
['www.esquire.com', '2000']
('precision avg', 19.74696549715683)
['www.foxnews.com', '2010']
('precision avg', 7.407002336422208)
['www.foxnews.com', '2015']
('precision avg', 15.835018951954778)
['www.forbes.com', '2000']
('precision avg', 15.992915261455677)
['news.bbc.co.uk', '2015']
('precision avg', 3.091011492057163)
['www.forbes.com', '2005']
('precision avg', 9.744725892874227)
['news.bbc.co.uk', '2010']
('precision avg', 10.14873853266361)
['entertainment.msn.com', '2000']
('precision avg', 14.869596414949237)
['entertainment.msn.com', '2005']
('precision avg', 8.107769300730398)
['thenation.com', '2015']
('precision avg', 10.763310119846189)
['thenation.com', '2010']
('precision avg', 10.941559513491466)
['news.yahoo.com', '2010']
('precision avg', 11.028399022844255)
['news.yahoo.com', '2015']
('precision avg', 0.43174075354630903)
['www.cnn.com', '2010']
('precision avg', 10.091730224641404)
['www.cnn.com', '2015']
('precision avg', 6.9790648234194865)
['www.latimes.com', '2015']
('precision avg', 2.306687263743309)
['www.latimes.com', '2010']
('precision avg', 8.227712546142582)
['www.foxnews.com', '2000']
('precision avg', 18.808944435458827)
['www.foxnews.com', '2005']
('precision avg', 7.333776328426291)
['www.nymag.com', '2000']
('precision avg', 19.472651318789666)
['news.bbc.co.uk', '2005']
('precision avg', 11.953964611885027)
['www.forbes.com', '2015']
('precision avg', 6.815638721880534)
['news.bbc.co.uk', '2000']
('precision avg', 20.26747675428014)
['www.nymag.com', '2010']
('precision avg', 4.487179049829372)
['www.forbes.com', '2010']
('precision avg', 11.712640381904231)
['thenation.com', '2005']
('precision avg', 18.57458753361082)
['entertainment.msn.com', '2010']
('precision avg', 6.611170100517503)
['entertainment.msn.com', '2015']
('precision avg', 1.7446751624113788)
['www.esquire.com', '2015']
('precision avg', 1.6500271609475723)
['thenation.com', '2000']
('precision avg', 22.0446909978015)
['www.nymag.com', '2005']
('precision avg', 14.303680153538203)
['www.esquire.com', '2005']
('precision avg', 16.80870388885749)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
['news.yahoo.com', '2000']
('precision avg', 14.557824713547324)
['www.cnn.com', '2005']
('precision avg', 12.983641148907921)
['news.yahoo.com', '2005']
('precision avg', 14.769238114048926)
['www.cnn.com', '2000']
('precision avg', 16.288499661433047)
['www.esquire.com', '2010']
('precision avg', 9.278793492904477)
['www.nymag.com', '2015']
('precision avg', 1.136625085557761)
['www.latimes.com', '2005']
('precision avg', 14.8822298308542)
['www.latimes.com', '2000']
('precision avg', 19.347211008393003)
['www.esquire.com', '2000']
('precision avg', 19.111482012344716)
['www.foxnews.com', '2010']
('precision avg', 7.8219680106321015)
['www.foxnews.com', '2015']
('precision avg', 14.385610453364006)
['www.forbes.com', '2000']
('precision avg', 16.54216289548574)
['news.bbc.co.uk', '2015']
('precision avg', 3.1861385002270435)
['www.forbes.com', '2005']
('precision avg', 11.022986801008575)
['news.bbc.co.uk', '2010']
('precision avg', 8.755739166613798)
['entertainment.msn.com', '2000']
('precision avg', 17.567338780416893)
['entertainment.msn.com', '2005']
('precision avg', 11.030927001146262)
['thenation.com', '2015']
('precision avg', 12.369585949003147)
['thenation.com', '2010']
('precision avg', 11.659791100680401)
['news.yahoo.com', '2010']
('precision avg', 10.593925054985139)
['news.yahoo.com', '2015']
('precision avg', 0.4598833002115912)
['www.cnn.com', '2010']
('precision avg', 8.619594603750281)
['www.cnn.com', '2015']
('precision avg', 7.346863233524706)
['www.latimes.com', '2015']
('precision avg', 3.075767505808502)
['www.latimes.com', '2010']
('precision avg', 10.84870181250924)
['www.foxnews.com', '2000']
('precision avg', 19.60889352672145)
['www.foxnews.com', '2005']
('precision avg', 8.350480133373702)
['www.nymag.com', '2000']
('precision avg', 19.544141395312337)
['news.bbc.co.uk', '2005']
('precision avg', 15.530486996105186)
['www.forbes.com', '2015']
('precision avg', 7.505773153088242)
['news.bbc.co.uk', '2000']
('precision avg', 16.065922188140068)
['www.nymag.com', '2010']
('precision avg', 4.528534339042629)
['www.forbes.com', '2010']
('precision avg', 16.493749288125947)
['thenation.com', '2005']
('precision avg', 18.9988474341654)
['entertainment.msn.com', '2010']
('precision avg', 8.090031109345412)
['entertainment.msn.com', '2015']
('precision avg', 1.7435364305704588)
['www.esquire.com', '2015']
('precision avg', 1.7233253685695629)
['thenation.com', '2000']
('precision avg', 20.92481705018897)
['www.nymag.com', '2005']
('precision avg', 13.787744826205765)
['www.esquire.com', '2005']
('precision avg', 21.41145751052602)
>>> trimmed_results[0]
23: ['c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\\BodyTextExtractor2Filter',
{'entertainment.msn.com;2000': [{'a': 0.9472774416594641,
'f1': 0.8551068883610451,
'p': 0.8695652173913043,
'r': 0.8411214953271028},
{'a': 0.9224137931034483,
'f1': 0.8266199649737302,
'p': 0.8973384030418251,
'r': 0.7662337662337663},
{'a': 0.933852140077821,
'f1': 0.9098939929328622,
'p': 0.9501845018450185,
'r': 0.8728813559322034},
{'a': 0.8989405052974735,
'f1': 0.7847222222222223,
'p': 0.7174603174603175,
'r': 0.8659003831417624},
{'a': 0.927536231884058,
'f1': 0.8558758314855874,
'p': 0.8577777777777778,
'r': 0.8539823008849557},
{'a': 0.9419152276295133,
'f1': 0.87987012987013,
'p': 0.9093959731543624,
'r': 0.8522012578616353},
{'a': 0.9335180055401662,
'f1': 0.916083916083916,
'p': 0.9509981851179673,
'r': 0.8836424957841484},
{'a': 0.8910472972972973,
'f1': 0.7425149700598803,
'p': 0.6813186813186813,
'r': 0.8157894736842105},
{'a': 0.894695170229612,
'f1': 0.8129395218002814,
'p': 0.774798927613941,
'r': 0.8550295857988166},
{'a': 0.8803122289679098,
'f1': 0.676056338028169,
'p': 0.6075949367088608,
'r': 0.7619047619047619},
{'a': 0.9037656903765691,
'f1': 0.8804159445407279,
'p': 0.8581081081081081,
'r': 0.9039145907473309},
{'a': 0.9509632224168126,
'f1': 0.9087947882736157,
'p': 0.9,
'r': 0.9177631578947368},
{'a': 0.9333680374804789,
'f1': 0.9330543933054394,
'p': 0.970620239390642,
'r': 0.8982880161127895},
{'a': 0.9530469530469531,
'f1': 0.6356589147286822,
'p': 0.6029411764705882,
'r': 0.6721311475409836},
{'a': 0.9472981987991995,
'f1': 0.9090909090909091,
'p': 0.9360189573459715,
'r': 0.883668903803132},
{'a': 0.9328579916815211,
'f1': 0.8986547085201794,
'p': 0.9488636363636364,
'r': 0.8534923339011925},
{'a': 0.9454148471615721,
'f1': 0.8898678414096917,
'p': 0.9181818181818182,
'r': 0.8632478632478633},
{'a': 0.8812227074235808,
'f1': 0.7301587301587301,
'p': 0.6789667896678967,
'r': 0.7896995708154506},
{'a': 0.8959484346224678,
'f1': 0.7064935064935064,
'p': 0.6507177033492823,
'r': 0.7727272727272727},
{'a': 0.889644746787604,
'f1': 0.7972222222222222,
'p': 0.7572559366754618,
'r': 0.841642228739003},
{'a': 0.9303030303030303,
'f1': 0.8993435448577681,
'p': 0.9383561643835616,
'r': 0.8634453781512605},
{'a': 0.9379900213827512,
'f1': 0.9186155285313378,
'p': 0.9478764478764479,
'r': 0.8911070780399274},
{'a': 0.9497374343585896,
'f1': 0.9130998702983139,
'p': 0.9263157894736842,
'r': 0.9002557544757033},
{'a': 0.9401555954518253,
'f1': 0.9159663865546219,
'p': 0.9527972027972028,
'r': 0.8818770226537217},
{'a': 0.9470013947001394,
'f1': 0.9573273441886581,
'p': 0.9567901234567902,
'r': 0.9578651685393258}],
'entertainment.msn.com;2005': [{'a': 0.6906686260102866,
'f1': 0.6982078853046595,
'p': 0.5553021664766249,
'r': 0.9401544401544402},
{'a': 0.5468451242829828,
'f1': 0.21000000000000002,
'p': 0.12401574803149606,
'r': 0.6847826086956522},
{'a': 0.49862825788751713,
'f1': 0.23455497382198953,
'p': 0.14267515923566879,
'r': 0.6588235294117647},
{'a': 0.6598006644518273,
'f1': 0.580327868852459,
'p': 0.4270205066344994,
'r': 0.9053708439897699},
{'a': 0.6897289586305279,
'f1': 0.7030716723549487,
'p': 0.5640744797371303,
'r': 0.9329710144927537},
{'a': 0.6415343915343915,
'f1': 0.5893939393939394,
'p': 0.44457142857142856,
'r': 0.8741573033707866},
{'a': 0.6283185840707964,
'f1': 0.5948553054662379,
'p': 0.44417767106842737,
'r': 0.9002433090024331},
{'a': 0.7079343365253078,
'f1': 0.7292327203551046,
'p': 0.5958549222797928,
'r': 0.9395424836601307},
{'a': 0.6803051317614425,
'f1': 0.6069906223358909,
'p': 0.4587628865979381,
'r': 0.8967254408060453},
{'a': 0.8139684583199227,
'f1': 0.8720106288751107,
'p': 0.7942718838241226,
'r': 0.9666175748649976},
{'a': 0.7660757733750434,
'f1': 0.8402563493947306,
'p': 0.7378074197582326,
'r': 0.9757442116868799},
{'a': 0.6724023825281271,
'f1': 0.6583850931677019,
'p': 0.5107066381156317,
'r': 0.9262135922330097},
{'a': 0.6503153468815698,
'f1': 0.6453447050461976,
'p': 0.49780701754385964,
'r': 0.9171717171717172},
{'a': 0.7622252131000449,
'f1': 0.8039940828402367,
'p': 0.7003865979381443,
'r': 0.9435763888888888},
{'a': 0.5560640732265446,
'f1': 0.4203187250996016,
'p': 0.2779973649538867,
'r': 0.8612244897959184},
{'a': 0.6134852801519468,
'f1': 0.36899224806201547,
'p': 0.2389558232931727,
'r': 0.8095238095238095},
{'a': 0.6816109422492401,
'f1': 0.6823351023502654,
'p': 0.5357142857142857,
'r': 0.9394572025052192},
{'a': 0.5220125786163522,
'f1': 0.27790973871733965,
'p': 0.16931982633863965,
'r': 0.7748344370860927},
{'a': 0.6473509933774835,
'f1': 0.5988700564971752,
'p': 0.4428969359331476,
'r': 0.9244186046511628},
{'a': 0.662015503875969,
'f1': 0.6466774716369531,
'p': 0.4962686567164179,
'r': 0.9279069767441861},
{'a': 0.5902621722846442,
'f1': 0.3572267920094007,
'p': 0.22926093514328807,
'r': 0.8085106382978723},
{'a': 0.5920763022743947,
'f1': 0.540495867768595,
'p': 0.38698224852071006,
'r': 0.8958904109589041},
{'a': 0.6629581151832461,
'f1': 0.57190357439734,
'p': 0.4226044226044226,
'r': 0.884318766066838},
{'a': 0.7147041593438781,
'f1': 0.7240793201133144,
'p': 0.5889400921658986,
'r': 0.9397058823529412},
{'a': 0.6313432835820896,
'f1': 0.6085578446909667,
'p': 0.4544378698224852,
'r': 0.920863309352518},
{'a': 0.5412639405204461,
'f1': 0.25392986698911735,
'p': 0.15306122448979592,
'r': 0.7446808510638298},
{'a': 0.6824005394470668,
'f1': 0.6713189113747383,
'p': 0.5320796460176991,
'r': 0.9092627599243857}],
'entertainment.msn.com;2010': [{'a': 0.3428857047650783,
'f1': 0.1658206429780034,
'p': 0.09201877934272301,
'r': 0.8376068376068376},
{'a': 0.45119947420308903,
'f1': 0.42013888888888884,
'p': 0.2742520398912058,
'r': 0.8976261127596439},
{'a': 0.6411235342241615,
'f1': 0.5517711171662125,
'p': 0.4136874361593463,
'r': 0.8282208588957055},
{'a': 0.9039820166987798,
'f1': 0.8546426835196889,
'p': 0.8108856088560885,
'r': 0.9033915724563206},
{'a': 0.3921737260804128,
'f1': 0.34117921230482406,
'p': 0.20938215102974828,
'r': 0.9207547169811321},
{'a': 0.5535641547861507,
'f1': 0.3234567901234568,
'p': 0.19969512195121952,
'r': 0.8506493506493507},
{'a': 0.14695238095238095,
'f1': 0.10955363356198429,
'p': 0.058325394305070395,
'r': 0.9003267973856209},
{'a': 0.7608596250571559,
'f1': 0.6565988181221274,
'p': 0.5488474204171241,
'r': 0.8169934640522876},
{'a': 0.884125920964501,
'f1': 0.7920673076923077,
'p': 0.7289823008849557,
'r': 0.8671052631578947},
{'a': 0.896735273243435,
'f1': 0.8186915887850468,
'p': 0.7595375722543353,
'r': 0.8878378378378379},
{'a': 0.7920924033762772,
'f1': 0.7240566037735848,
'p': 0.6220871327254306,
'r': 0.8660084626234132},
{'a': 0.36601513240857503,
'f1': 0.25046589638464406,
'p': 0.14608695652173914,
'r': 0.8772845953002611},
{'a': 0.1811268579329416,
'f1': 0.1899469994870918,
'p': 0.10574909575480677,
'r': 0.9320469798657718},
{'a': 0.41000352236703064,
'f1': 0.32595573440643866,
'p': 0.20009881422924902,
'r': 0.8785249457700651},
{'a': 0.7624944714727997,
'f1': 0.6670799752014879,
'p': 0.5563598759048604,
'r': 0.8328173374613003},
{'a': 0.4393613754989254,
'f1': 0.42506297229219153,
'p': 0.27439024390243905,
'r': 0.9427374301675978},
{'a': 0.8097795364612775,
'f1': 0.837321730722746,
'p': 0.7646799116997792,
'r': 0.9252136752136753},
{'a': 0.13969136253931105,
'f1': 0.13577253691866875,
'p': 0.07321131447587355,
'r': 0.9333333333333333},
{'a': 0.6126237623762376,
'f1': 0.44503546099290786,
'p': 0.2977461447212337,
'r': 0.8807017543859649},
{'a': 0.7682926829268293,
'f1': 0.7115384615384617,
'p': 0.6161262050832603,
'r': 0.8419161676646707},
{'a': 0.6949648711943794,
'f1': 0.7540132200188858,
'p': 0.6240719030871434,
'r': 0.9522957662492546},
{'a': 0.4694069657985566,
'f1': 0.4663931839697065,
'p': 0.3124735729386892,
'r': 0.9191542288557214},
{'a': 0.789193302891933,
'f1': 0.7656514382402708,
'p': 0.6830188679245283,
'r': 0.8710298363811357},
{'a': 0.3921901528013582,
'f1': 0.32634408602150533,
'p': 0.19927774130006565,
'r': 0.900593471810089},
{'a': 0.7479967948717948,
'f1': 0.6831234256926951,
'p': 0.5809768637532133,
'r': 0.8288508557457213}],
'entertainment.msn.com;2015': [{'a': 0.5922783603431839,
'f1': 0.30057236304170076,
'p': 0.18274010737721216,
'r': 0.8462246777163904},
{'a': 0.5627964528768818,
'f1': 0.08620689655172414,
'p': 0.045599635202918376,
'r': 0.7874015748031497},
{'a': 0.5792236086353734,
'f1': 0.20978240654640132,
'p': 0.12028150991682661,
'r': 0.8197674418604651},
{'a': 0.5620437956204379,
'f1': 0.08229211546747094,
'p': 0.0435278030993619,
'r': 0.7519685039370079},
{'a': 0.5617125883437468,
'f1': 0.08782775527606056,
'p': 0.046627433227704844,
'r': 0.7545787545787546},
{'a': 0.5577628361858191,
'f1': 0.08821676118462508,
'p': 0.04678102027177545,
'r': 0.7720588235294118},
{'a': 0.49898887765419614,
'f1': 0.17462520821765687,
'p': 0.09741365959423881,
'r': 0.8420348058902276},
{'a': 0.5571502323180175,
'f1': 0.06700021753317381,
'p': 0.03503184713375796,
'r': 0.7661691542288557},
{'a': 0.5541204819277108,
'f1': 0.11345343043311613,
'p': 0.060980634528224144,
'r': 0.8131868131868132},
{'a': 0.5564376590330788,
'f1': 0.07825719120135363,
'p': 0.04120267260579064,
'r': 0.7773109243697479},
{'a': 0.4916342588405535,
'f1': 0.0852725793327909,
'p': 0.04492455418381344,
'r': 0.8370607028753994},
{'a': 0.5661891699685055,
'f1': 0.12139917695473253,
'p': 0.06573083778966132,
'r': 0.793010752688172},
{'a': 0.5645177312009536,
'f1': 0.16622289844047167,
'p': 0.09192259150189314,
'r': 0.8670634920634921},
{'a': 0.5425956576769545,
'f1': 0.11157337367624812,
'p': 0.059995932479153954,
'r': 0.7951482479784366},
{'a': 0.5201313937118723,
'f1': 0.0501579045142114,
'p': 0.025921658986175114,
'r': 0.7714285714285715},
{'a': 0.5346083788706739,
'f1': 0.19119974675530232,
'p': 0.10803076372741907,
'r': 0.8308115543328748},
{'a': 0.5627691984452148,
'f1': 0.05063868613138687,
'p': 0.02616690240452617,
'r': 0.7816901408450704},
{'a': 0.5659126365054602,
'f1': 0.159682899207248,
'p': 0.08790523690773068,
'r': 0.8703703703703703},
{'a': 0.5280909612077203,
'f1': 0.09525554130793186,
'p': 0.05061319836480436,
'r': 0.8074534161490683},
{'a': 0.5116564417177915,
'f1': 0.16935002981514607,
'p': 0.09424257507881201,
'r': 0.8340675477239354},
{'a': 0.5793736501079914,
'f1': 0.3330479452054795,
'p': 0.20717202201313686,
'r': 0.8487272727272728},
{'a': 0.5667684090041969,
'f1': 0.1768756795940558,
'p': 0.09878542510121457,
'r': 0.8442906574394463},
{'a': 0.5547138908793121,
'f1': 0.05884898312418867,
'p': 0.030665163472378805,
'r': 0.7272727272727273},
{'a': 0.5545847039473685,
'f1': 0.05824820691154097,
'p': 0.030330466274332276,
'r': 0.73224043715847},
{'a': 0.5695238095238095,
'f1': 0.1908342284282134,
'p': 0.10769852495453627,
'r': 0.8367346938775511}],
'news.bbc.co.uk;2000': [{'a': 0.9118311981914092,
'f1': 0.8494208494208494,
'p': 0.88,
'r': 0.8208955223880597},
{'a': 0.9057377049180327,
'f1': 0.8410138248847927,
'p': 0.8837772397094431,
'r': 0.8021978021978022},
{'a': 0.9044198895027624,
'f1': 0.9006318207926479,
'p': 0.9223529411764706,
'r': 0.8799102132435466},
{'a': 0.9266358228684732,
'f1': 0.9125295508274233,
'p': 0.9429967426710097,
'r': 0.8839694656488549},
{'a': 0.9117997616209773,
'f1': 0.9022457067371201,
'p': 0.918010752688172,
'r': 0.887012987012987},
{'a': 0.8712029161603888,
'f1': 0.825944170771757,
'p': 0.8152350081037277,
'r': 0.8369384359400999},
{'a': 0.9092284417549168,
'f1': 0.8529411764705883,
'p': 0.8969072164948454,
'r': 0.8130841121495327},
{'a': 0.9022482893450635,
'f1': 0.8511904761904762,
'p': 0.8746177370030581,
'r': 0.8289855072463768},
{'a': 0.8823114869626497,
'f1': 0.7930607187112763,
'p': 0.8142493638676844,
'r': 0.7729468599033816},
{'a': 0.9267782426778243,
'f1': 0.9076517150395779,
'p': 0.9502762430939227,
'r': 0.8686868686868687},
{'a': 0.9225251076040172,
'f1': 0.8689320388349514,
'p': 0.9132653061224489,
'r': 0.8287037037037037},
{'a': 0.8396866840731071,
'f1': 0.8188790560471977,
'p': 0.7797752808988764,
'r': 0.8621118012422361},
{'a': 0.9097978227060654,
'f1': 0.8284023668639052,
'p': 0.8588957055214724,
'r': 0.8},
{'a': 0.8773255813953489,
'f1': 0.8451944240645635,
'p': 0.8458149779735683,
'r': 0.844574780058651},
{'a': 0.9183238636363636,
'f1': 0.8762109795479011,
'p': 0.9187358916478555,
'r': 0.8374485596707819},
{'a': 0.9077791718946048,
'f1': 0.8746803069053709,
'p': 0.9210053859964094,
'r': 0.8327922077922078},
{'a': 0.9012187299550994,
'f1': 0.8555347091932457,
'p': 0.9101796407185628,
'r': 0.8070796460176991},
{'a': 0.8915866741953699,
'f1': 0.8748370273794003,
'p': 0.8946666666666667,
'r': 0.8558673469387755},
{'a': 0.9106292966684294,
'f1': 0.9038133181559476,
'p': 0.928654970760234,
'r': 0.8802660753880266},
{'a': 0.8931178310740354,
'f1': 0.8686739269698911,
'p': 0.9125168236877523,
'r': 0.8288508557457213},
{'a': 0.8817005545286506,
'f1': 0.8502340093603743,
'p': 0.8596214511041009,
'r': 0.8410493827160493},
{'a': 0.9145496535796767,
'f1': 0.8948863636363636,
'p': 0.9402985074626866,
'r': 0.8536585365853658},
{'a': 0.9274905422446406,
'f1': 0.9117421335379894,
'p': 0.9565217391304348,
'r': 0.8709677419354839},
{'a': 0.8952116585704372,
'f1': 0.8422152560083594,
'p': 0.8448637316561844,
'r': 0.8395833333333333},
{'a': 0.9093789607097592,
'f1': 0.8686868686868686,
'p': 0.9148936170212766,
'r': 0.8269230769230769}],
'news.bbc.co.uk;2005': [{'a': 0.8391019644527596,
'f1': 0.7152317880794702,
'p': 0.6189111747851003,
'r': 0.8470588235294118},
{'a': 0.8142076502732241,
'f1': 0.7186761229314422,
'p': 0.6166328600405679,
'r': 0.8611898016997167},
{'a': 0.7044967880085653,
'f1': 0.7008670520231215,
'p': 0.5652680652680653,
'r': 0.9220532319391636},
{'a': 0.8440366972477065,
'f1': 0.8042226487523992,
'p': 0.7325174825174825,
'r': 0.8914893617021277},
{'a': 0.8418952618453865,
'f1': 0.8483978957436633,
'p': 0.8063636363636364,
'r': 0.8950554994954592},
{'a': 0.8062460165710643,
'f1': 0.8033635187580855,
'p': 0.711340206185567,
'r': 0.9227340267459139},
{'a': 0.9006181645268664,
'f1': 0.914238818219122,
'p': 0.9034874290348743,
'r': 0.925249169435216},
{'a': 0.82328190743338,
'f1': 0.7700729927007299,
'p': 0.6963696369636964,
'r': 0.8612244897959184},
{'a': 0.8155339805825242,
'f1': 0.7031250000000001,
'p': 0.6094808126410836,
'r': 0.8307692307692308},
{'a': 0.836343732895457,
'f1': 0.8280621046578494,
'p': 0.7903402854006586,
'r': 0.8695652173913043},
{'a': 0.8426966292134831,
'f1': 0.8038528896672504,
'p': 0.765,
'r': 0.8468634686346863},
{'a': 0.8250950570342205,
'f1': 0.7415730337078651,
'p': 0.6626506024096386,
'r': 0.8418367346938775},
{'a': 0.8407202216066482,
'f1': 0.789762340036563,
'p': 0.7728085867620751,
'r': 0.8074766355140187},
{'a': 0.8260200153964589,
'f1': 0.755939524838013,
'p': 0.6809338521400778,
'r': 0.8495145631067961},
{'a': 0.7973811164713991,
'f1': 0.6931106471816284,
'p': 0.5981981981981982,
'r': 0.8238213399503722},
{'a': 0.8313349320543565,
'f1': 0.7365792759051186,
'p': 0.6526548672566371,
'r': 0.8452722063037249},
{'a': 0.7385358004827032,
'f1': 0.6277205040091639,
'p': 0.4823943661971831,
'r': 0.898360655737705},
{'a': 0.7896613190730838,
'f1': 0.7941860465116279,
'p': 0.6905965621840243,
'r': 0.9343365253077975},
{'a': 0.6781193490054249,
'f1': 0.5180505415162455,
'p': 0.38523489932885907,
'r': 0.790633608815427},
{'a': 0.794679005205321,
'f1': 0.8104644954618259,
'p': 0.711340206185567,
'r': 0.9416873449131513},
{'a': 0.8725854383358098,
'f1': 0.8961550105964273,
'p': 0.8716136631330977,
'r': 0.9221183800623053},
{'a': 0.7995110024449877,
'f1': 0.638235294117647,
'p': 0.5331695331695332,
'r': 0.7948717948717948},
{'a': 0.8473439917483239,
'f1': 0.8435517970401691,
'p': 0.8093306288032455,
'r': 0.8807947019867549},
{'a': 0.709572742022715,
'f1': 0.6546623794212219,
'p': 0.5146612740141557,
'r': 0.8992932862190812},
{'a': 0.8297376093294461,
'f1': 0.8027027027027026,
'p': 0.7342398022249691,
'r': 0.8852459016393442}],
'news.bbc.co.uk;2010': [{'a': 0.7625243981782693,
'f1': 0.7058823529411765,
'p': 0.6311239193083573,
'r': 0.8007312614259597},
{'a': 0.7582283624542869,
'f1': 0.5697758496023138,
'p': 0.4586728754365541,
'r': 0.7519083969465649},
{'a': 0.7143962848297214,
'f1': 0.45089285714285715,
'p': 0.33554817275747506,
'r': 0.6870748299319728},
{'a': 0.706855791962175,
'f1': 0.5267175572519084,
'p': 0.3942857142857143,
'r': 0.7931034482758621},
{'a': 0.7664473684210527,
'f1': 0.32746955345060896,
'p': 0.21530249110320285,
'r': 0.6836158192090396},
{'a': 0.7461730153079388,
'f1': 0.6067291781577496,
'p': 0.5018248175182481,
'r': 0.7670850767085077},
{'a': 0.6400894187779433,
'f1': 0.02424242424242424,
'p': 0.013729977116704805,
'r': 0.10344827586206896},
{'a': 0.7614255765199162,
'f1': 0.4878487848784878,
'p': 0.3702185792349727,
'r': 0.7150395778364116},
{'a': 0.7620689655172413,
'f1': 0.463035019455253,
'p': 0.3380681818181818,
'r': 0.7345679012345679},
{'a': 0.7661676646706587,
'f1': 0.7279693486590039,
'p': 0.6626506024096386,
'r': 0.8075734157650696},
{'a': 0.7575757575757576,
'f1': 0.456,
'p': 0.33727810650887574,
'r': 0.7037037037037037},
{'a': 0.7598484848484849,
'f1': 0.6313953488372093,
'p': 0.5386904761904762,
'r': 0.7626404494382022},
{'a': 0.754180602006689,
'f1': 0.48148148148148145,
'p': 0.3684210526315789,
'r': 0.6946564885496184},
{'a': 0.7579535683576956,
'f1': 0.49141824751580854,
'p': 0.37006802721088433,
'r': 0.7311827956989247},
{'a': 0.7263339070567987,
'f1': 0.29646017699115046,
'p': 0.18457300275482094,
'r': 0.7528089887640449},
{'a': 0.7547416612164813,
'f1': 0.6553308823529411,
'p': 0.5882838283828383,
'r': 0.7396265560165975},
{'a': 0.7636441770519983,
'f1': 0.5454545454545453,
'p': 0.4263565891472868,
'r': 0.7568807339449541},
{'a': 0.6528982992016661,
'f1': 0.4959677419354839,
'p': 0.3614988978692138,
'r': 0.7897271268057785},
{'a': 0.6826923076923077,
'f1': 0.5428424833247819,
'p': 0.41884402216943784,
'r': 0.7711370262390671},
{'a': 0.7665213015766521,
'f1': 0.7015437392795882,
'p': 0.621580547112462,
'r': 0.8051181102362205},
{'a': 0.7759719566602932,
'f1': 0.7356148928168484,
'p': 0.6694045174537988,
'r': 0.8163606010016694},
{'a': 0.7036881810561609,
'f1': 0.4532095901005414,
'p': 0.32447397563676633,
'r': 0.7512820512820513},
{'a': 0.7713230355943587,
'f1': 0.6939325842696629,
'p': 0.628152969894223,
'r': 0.7751004016064257},
{'a': 0.6798048048048048,
'f1': 0.5066512434933488,
'p': 0.3721325403568394,
'r': 0.7934782608695652},
{'a': 0.6916354556803995,
'f1': 0.43650190114068443,
'p': 0.30434782608695654,
'r': 0.771505376344086}],
'news.bbc.co.uk;2015': [{'a': 0.5555871077665767,
'f1': 0.23658536585365852,
'p': 0.14049826187717265,
'r': 0.7484567901234568},
{'a': 0.5497448979591837,
'f1': 0.20236003012804415,
'p': 0.11711711711711711,
'r': 0.7435424354243543},
{'a': 0.563236936825121,
'f1': 0.3763955342902711,
'p': 0.24402573529411764,
'r': 0.8226181254841208},
{'a': 0.5506756756756757,
'f1': 0.14048890137679126,
'p': 0.07836990595611286,
'r': 0.6775067750677507},
{'a': 0.5788987191337647,
'f1': 0.36838978015448604,
'p': 0.23779084633086167,
'r': 0.8172231985940246},
{'a': 0.5673118905545045,
'f1': 0.1927776269345642,
'p': 0.11216429699842022,
'r': 0.6853281853281853},
{'a': 0.5444834855938159,
'f1': 0.19995062947420392,
'p': 0.11571428571428571,
'r': 0.73502722323049},
{'a': 0.5694312474548663,
'f1': 0.2543488481429243,
'p': 0.15510321100917432,
'r': 0.706266318537859},
{'a': 0.5528025381477565,
'f1': 0.11217756448710257,
'p': 0.061131088591042826,
'r': 0.68},
{'a': 0.5627456909585727,
'f1': 0.1770062606715993,
'p': 0.10103963612735542,
'r': 0.713302752293578},
{'a': 0.48321324543921507,
'f1': 0.06335093081411503,
'p': 0.03328467153284671,
'r': 0.6551724137931034},
{'a': 0.5497154836777478,
'f1': 0.08183206106870229,
'p': 0.043762246897452645,
'r': 0.6291079812206573},
{'a': 0.558078141499472,
'f1': 0.2945638432364096,
'p': 0.1813700051894136,
'r': 0.7836322869955157},
{'a': 0.5500292568753657,
'f1': 0.15864332603938733,
'p': 0.08876645240281604,
'r': 0.7455012853470437},
{'a': 0.5471483430521974,
'f1': 0.1939240506329114,
'p': 0.1114019778941245,
'r': 0.748046875},
{'a': 0.5554911619820342,
'f1': 0.1520176893311222,
'p': 0.08550995024875623,
'r': 0.6840796019900498},
{'a': 0.5763792625450513,
'f1': 0.26503126503126506,
'p': 0.1626808385001476,
'r': 0.7146562905317769},
{'a': 0.5585113353426812,
'f1': 0.31202777210537064,
'p': 0.19435258204019334,
'r': 0.7908902691511387},
{'a': 0.5703114281794485,
'f1': 0.30182790905037893,
'p': 0.18701657458563536,
'r': 0.7817551963048499},
{'a': 0.5642479213907785,
'f1': 0.25210810810810813,
'p': 0.1531791907514451,
'r': 0.7118437118437119},
{'a': 0.5640256959314776,
'f1': 0.23726273726273728,
'p': 0.14061574896388396,
'r': 0.7587859424920128},
{'a': 0.5690190257725003,
'f1': 0.2325804901489668,
'p': 0.1399652978600347,
'r': 0.6875},
{'a': 0.5737860137968348,
'f1': 0.3251231527093596,
'p': 0.2042518837459634,
'r': 0.7964323189926548},
{'a': 0.5553221288515406,
'f1': 0.24530544330877105,
'p': 0.1462999716472923,
'r': 0.7588235294117647},
{'a': 0.5427863292460214,
'f1': 0.1616838077015068,
'p': 0.09164859002169197,
'r': 0.6855983772819473}],
'news.yahoo.com;2000': [{'a': 0.9225543478260869,
'f1': 0.9279393173198484,
'p': 0.9607329842931938,
'r': 0.8973105134474327},
{'a': 0.8682457438934122,
'f1': 0.8540983606557376,
'p': 0.837620578778135,
'r': 0.8712374581939799},
{'a': 0.9199632014719411,
'f1': 0.9085173501577286,
'p': 0.9230769230769231,
'r': 0.8944099378881988},
{'a': 0.8974093264248705,
'f1': 0.8626907073509015,
'p': 0.8405405405405405,
'r': 0.886039886039886},
{'a': 0.9158878504672897,
'f1': 0.8979591836734695,
'p': 0.9145496535796767,
'r': 0.8819599109131403},
{'a': 0.9133271202236719,
'f1': 0.8963210702341137,
'p': 0.919908466819222,
'r': 0.8739130434782608},
{'a': 0.8031042128603104,
'f1': 0.8314350797266515,
'p': 0.909468438538206,
'r': 0.7657342657342657},
{'a': 0.8618903754855416,
'f1': 0.8683127572016461,
'p': 0.8591205211726385,
'r': 0.8777038269550749},
{'a': 0.8721804511278195,
'f1': 0.859338061465721,
'p': 0.8644470868014269,
'r': 0.854289071680376},
{'a': 0.877562028047465,
'f1': 0.8540192926045016,
'p': 0.8634590377113134,
'r': 0.8447837150127226},
{'a': 0.8776978417266187,
'f1': 0.8478747203579418,
'p': 0.838495575221239,
'r': 0.8574660633484162},
{'a': 0.920123839009288,
'f1': 0.9315649867374005,
'p': 0.9430719656283566,
'r': 0.9203354297693921},
{'a': 0.8999055712936733,
'f1': 0.8582887700534759,
'p': 0.856,
'r': 0.8605898123324397},
{'a': 0.9074235807860263,
'f1': 0.8960784313725491,
'p': 0.8943248532289628,
'r': 0.8978388998035364},
{'a': 0.8967314069161535,
'f1': 0.9213564213564215,
'p': 0.8948843728100911,
'r': 0.9494423791821561},
{'a': 0.8570649208947081,
'f1': 0.844418052256532,
'p': 0.8535414165666266,
'r': 0.8354876615746181},
{'a': 0.9123867069486404,
'f1': 0.8642745709828393,
'p': 0.9111842105263158,
'r': 0.8219584569732937},
{'a': 0.8925714285714286,
'f1': 0.8164062500000001,
'p': 0.8038461538461539,
'r': 0.8293650793650794},
{'a': 0.9205298013245033,
'f1': 0.9025522041763342,
'p': 0.9131455399061033,
'r': 0.8922018348623854},
{'a': 0.8363959691760522,
'f1': 0.7730263157894737,
'p': 0.7617504051863857,
'r': 0.7846410684474123},
{'a': 0.8599656357388317,
'f1': 0.6433260393873085,
'p': 0.6099585062240664,
'r': 0.6805555555555556},
{'a': 0.90089358245329,
'f1': 0.8842504743833016,
'p': 0.8944337811900192,
'r': 0.874296435272045},
{'a': 0.8973544973544973,
'f1': 0.8422764227642278,
'p': 0.8248407643312102,
'r': 0.8604651162790697},
{'a': 0.9201773835920177,
'f1': 0.924791086350975,
'p': 0.924791086350975,
'r': 0.924791086350975},
{'a': 0.9150214592274678,
'f1': 0.9247148288973385,
'p': 0.8928046989720999,
'r': 0.9589905362776026}],
'news.yahoo.com;2005': [{'a': 0.826288899210404,
'f1': 0.8064182194616977,
'p': 0.7369914853358562,
'r': 0.8902857142857142},
{'a': 0.7051349920592906,
'f1': 0.6634441087613293,
'p': 0.5294117647058824,
'r': 0.8883495145631068},
{'a': 0.8285714285714286,
'f1': 0.827937095282146,
'p': 0.766923736075407,
'r': 0.8994974874371859},
{'a': 0.7835408022130014,
'f1': 0.7016205910390848,
'p': 0.5832012678288431,
'r': 0.8803827751196173},
{'a': 0.7626582278481012,
'f1': 0.5689655172413792,
'p': 0.42950108459869846,
'r': 0.8425531914893617},
{'a': 0.8167247386759582,
'f1': 0.7674624226348364,
'p': 0.6625954198473283,
'r': 0.9117647058823529},
{'a': 0.6875776397515528,
'f1': 0.5755274261603376,
'p': 0.4289308176100629,
'r': 0.8743589743589744},
{'a': 0.7848872638634978,
'f1': 0.7252918287937743,
'p': 0.6044098573281452,
'r': 0.9066147859922179},
{'a': 0.915273132664437,
'f1': 0.8756137479541735,
'p': 0.8784893267651889,
'r': 0.8727569331158238},
{'a': 0.7367066895368782,
'f1': 0.7725925925925926,
'p': 0.6530995616781465,
'r': 0.9456029011786038},
{'a': 0.7989203778677463,
'f1': 0.6179487179487179,
'p': 0.5205183585313174,
'r': 0.7602523659305994},
{'a': 0.6863844977052524,
'f1': 0.7047527604416708,
'p': 0.5707620528771384,
'r': 0.9209535759096612},
{'a': 0.9134049186006234,
'f1': 0.9062265566391597,
'p': 0.94375,
'r': 0.8715728715728716},
{'a': 0.8586535072259429,
'f1': 0.8875175315568024,
'p': 0.8401486988847584,
'r': 0.9405469678953626},
{'a': 0.8676420551207894,
'f1': 0.8979805927091529,
'p': 0.852165256346441,
'r': 0.9490022172949002},
{'a': 0.7374233128834355,
'f1': 0.7720170454545455,
'p': 0.6516786570743405,
'r': 0.9468641114982579},
{'a': 0.7749169435215947,
'f1': 0.47984644913627633,
'p': 0.36231884057971014,
'r': 0.7102272727272727},
{'a': 0.7755662319835278,
'f1': 0.6765578635014837,
'p': 0.5652892561983471,
'r': 0.8423645320197044},
{'a': 0.8507462686567164,
'f1': 0.8557692307692308,
'p': 0.7837573385518591,
'r': 0.9423529411764706},
{'a': 0.8466257668711656,
'f1': 0.8619957537154989,
'p': 0.79296875,
'r': 0.9441860465116279},
{'a': 0.6668734491315137,
'f1': 0.5095890410958904,
'p': 0.3661417322834646,
'r': 0.8378378378378378},
{'a': 0.8207322872087494,
'f1': 0.7851851851851852,
'p': 0.7298728813559322,
'r': 0.8495684340320592},
{'a': 0.8586556169429097,
'f1': 0.8662309368191722,
'p': 0.8174342105263158,
'r': 0.9212233549582948},
{'a': 0.6675409836065573,
'f1': 0.5053658536585366,
'p': 0.36022253129346316,
'r': 0.8464052287581699},
{'a': 0.8410117434507678,
'f1': 0.8462882096069869,
'p': 0.7795655671761866,
'r': 0.9255014326647565}],
'news.yahoo.com;2010': [{'a': 0.8125,
'f1': 0.7473982970671712,
'p': 0.6954225352112676,
'r': 0.8077709611451943},
{'a': 0.792352371732817,
'f1': 0.49230769230769234,
'p': 0.37749546279491836,
'r': 0.7074829931972789},
{'a': 0.7949526813880127,
'f1': 0.6627756160830091,
'p': 0.5774011299435028,
'r': 0.7777777777777778},
{'a': 0.7983315197678637,
'f1': 0.7148717948717948,
'p': 0.6606635071090048,
'r': 0.7787709497206704},
{'a': 0.8179453836150845,
'f1': 0.7824397824397824,
'p': 0.7437223042836041,
'r': 0.8254098360655737},
{'a': 0.801693404634581,
'f1': 0.6120313862249346,
'p': 0.5043103448275862,
'r': 0.7782705099778271},
{'a': 0.8084656084656084,
'f1': 0.7454289732770745,
'p': 0.6824034334763949,
'r': 0.8212809917355371},
{'a': 0.7891472868217054,
'f1': 0.6472114137483787,
'p': 0.558165548098434,
'r': 0.7700617283950617},
{'a': 0.8128453038674033,
'f1': 0.7665805340223945,
'p': 0.7212317666126418,
'r': 0.8180147058823529},
{'a': 0.7882805816937554,
'f1': 0.5607808340727595,
'p': 0.4520743919885551,
'r': 0.7383177570093458},
{'a': 0.7756706753006476,
'f1': 0.41495778045838355,
'p': 0.29965156794425085,
'r': 0.6745098039215687},
{'a': 0.7818003913894325,
'f1': 0.4677804295942721,
'p': 0.3391003460207612,
'r': 0.7538461538461538},
{'a': 0.8298582151793161,
'f1': 0.8300943920044419,
'p': 0.806799784133837,
'r': 0.8547741566609491},
{'a': 0.7997992975413949,
'f1': 0.4017991004497751,
'p': 0.2809224318658281,
'r': 0.7052631578947368},
{'a': 0.8048540505083634,
'f1': 0.7550432276657061,
'p': 0.7005347593582888,
'r': 0.81875},
{'a': 0.8002373417721519,
'f1': 0.6918852959121414,
'p': 0.6203501094091903,
'r': 0.7820689655172414},
{'a': 0.7970494417862839,
'f1': 0.6688353936239428,
'p': 0.5881006864988558,
'r': 0.77526395173454},
{'a': 0.7986111111111112,
'f1': 0.6393562545720556,
'p': 0.5588235294117647,
'r': 0.747008547008547},
{'a': 0.799609375,
'f1': 0.673871582962492,
'p': 0.6057142857142858,
'r': 0.7593123209169055},
{'a': 0.7792207792207793,
'f1': 0.6678507992895204,
'p': 0.5784615384615385,
'r': 0.7899159663865546},
{'a': 0.8131220051603391,
'f1': 0.7050610820244329,
'p': 0.637223974763407,
'r': 0.7890625},
{'a': 0.7875927174645988,
'f1': 0.7403132728771641,
'p': 0.6834094368340944,
'r': 0.8075539568345323},
{'a': 0.8000719165767709,
'f1': 0.728780487804878,
'p': 0.6772438803263826,
'r': 0.7888067581837381},
{'a': 0.8107739515854074,
'f1': 0.754750331418471,
'p': 0.7,
'r': 0.8187919463087249},
{'a': 0.8030973451327433,
'f1': 0.5250800426894343,
'p': 0.41765704584040747,
'r': 0.7068965517241379}],
'news.yahoo.com;2015': [{'a': 0.3048423700544117,
'f1': 0.035215543412264724,
'p': 0.018075574600701208,
'r': 0.6803519061583577},
{'a': 0.34525586353944565,
'f1': 0.025003968883949835,
'p': 0.012745296378717378,
'r': 0.6548856548856549},
{'a': 0.2560697667057073,
'f1': 0.01878796735068785,
'p': 0.009526228883525974,
'r': 0.6766917293233082},
{'a': 0.3435495898583147,
'f1': 0.04774095842498107,
'p': 0.024713480419606526,
'r': 0.6997885835095138},
{'a': 0.38345512460183623,
'f1': 0.02623921085080148,
'p': 0.013366162504396765,
'r': 0.7112299465240641},
{'a': 0.3304576046566016,
'f1': 0.06543344214726152,
'p': 0.03422760217053087,
'r': 0.7411668036154478},
{'a': 0.3483621870718645,
'f1': 0.052654450640979206,
'p': 0.027355508729680914,
'r': 0.7003853564547207},
{'a': 0.3494353008685673,
'f1': 0.056013927787449846,
'p': 0.029069767441860465,
'r': 0.7660455486542443},
{'a': 0.2613521237506237,
'f1': 0.03416572750459695,
'p': 0.017480678185570347,
'r': 0.7506516072980017},
{'a': 0.38774996063612027,
'f1': 0.028579994004197064,
'p': 0.014579934747145187,
'r': 0.7185929648241206},
{'a': 0.2916898903840539,
'f1': 0.008692099104788083,
'p': 0.004374976629398347,
'r': 0.6573033707865169},
{'a': 0.34022892717958775,
'f1': 0.036302448804238864,
'p': 0.01866166077738516,
'r': 0.6636125654450262},
{'a': 0.3620855236554792,
'f1': 0.04883747220861439,
'p': 0.02526020348497252,
'r': 0.7330316742081447},
{'a': 0.33174694993689524,
'f1': 0.04314596588983848,
'p': 0.022292250233426705,
'r': 0.6686114352392065},
{'a': 0.24957875777119284,
'f1': 0.029311187103077677,
'p': 0.014970059880239521,
'r': 0.6976744186046512},
{'a': 0.3363527076518773,
'f1': 0.022886309376800855,
'p': 0.011643979057591623,
'r': 0.6634844868735084},
{'a': 0.3411867364746946,
'f1': 0.03392680875955105,
'p': 0.01741349545898071,
'r': 0.6562942008486563},
{'a': 0.2571102978941962,
'f1': 0.013344418153524759,
'p': 0.00674612582710089,
'r': 0.6089494163424124},
{'a': 0.3310363836824697,
'f1': 0.03394246426632894,
'p': 0.017437537180249853,
'r': 0.6346414073071719},
{'a': 0.34860527514807876,
'f1': 0.04199240562876927,
'p': 0.02163157289149695,
'r': 0.714828897338403},
{'a': 0.33099696356275304,
'f1': 0.01856045139017781,
'p': 0.009425426029256523,
'r': 0.6024096385542169},
{'a': 0.2689490523443717,
'f1': 0.024784973026843165,
'p': 0.012613009922822492,
'r': 0.7087980173482032},
{'a': 0.34358827597720065,
'f1': 0.07051195461299474,
'p': 0.03700552956188856,
'r': 0.7457142857142857},
{'a': 0.34750822755054067,
'f1': 0.06406581919951444,
'p': 0.03352152434721242,
'r': 0.721336370539104},
{'a': 0.25777743020254945,
'f1': 0.010921672433198549,
'p': 0.005512423993772223,
'r': 0.5835189309576837}],
'thenation.com;2000': [{'a': 0.948016415868673,
'f1': 0.963035019455253,
'p': 0.9611650485436893,
'r': 0.9649122807017544},
{'a': 0.900839054157132,
'f1': 0.9214975845410629,
'p': 0.8965922444183314,
'r': 0.9478260869565217},
{'a': 0.8160337552742616,
'f1': 0.844950213371266,
'p': 0.75,
'r': 0.9674267100977199},
{'a': 0.9159420289855073,
'f1': 0.9504950495049507,
'p': 0.9200264375413086,
'r': 0.9830508474576272},
{'a': 0.7521212121212121,
'f1': 0.8069844266163284,
'p': 0.6939935064935064,
'r': 0.963923337091319},
{'a': 0.8325673013788575,
'f1': 0.8754274548119199,
'p': 0.7950310559006211,
'r': 0.9739130434782609},
{'a': 0.9291457286432161,
'f1': 0.9593190998268898,
'p': 0.9308510638297872,
'r': 0.9895833333333334},
{'a': 0.9315551082033215,
'f1': 0.9603960396039604,
'p': 0.9371980676328503,
'r': 0.9847715736040609},
{'a': 0.8867091711623345,
'f1': 0.925459825750242,
'p': 0.8749237339841367,
'r': 0.9821917808219178},
{'a': 0.9161966156325544,
'f1': 0.9312169312169312,
'p': 0.9130998702983139,
'r': 0.9500674763832658},
{'a': 0.9013710747456878,
'f1': 0.9372007885102787,
'p': 0.8946236559139785,
'r': 0.984033116499113},
{'a': 0.7989382879893829,
'f1': 0.8438948995363215,
'p': 0.7465815861440291,
'r': 0.9703791469194313},
{'a': 0.92643391521197,
'f1': 0.8747346072186836,
'p': 0.8841201716738197,
'r': 0.865546218487395},
{'a': 0.9326113116726835,
'f1': 0.8828451882845187,
'p': 0.8865546218487395,
'r': 0.8791666666666667},
{'a': 0.929305912596401,
'f1': 0.9499089253187614,
'p': 0.924645390070922,
'r': 0.9765917602996255},
{'a': 0.8337819650067295,
'f1': 0.8753154972236243,
'p': 0.7903372835004557,
'r': 0.9807692307692307},
{'a': 0.9232209737827716,
'f1': 0.9459815546772069,
'p': 0.9220890410958904,
'r': 0.9711451758340848},
{'a': 0.9671549045716822,
'f1': 0.9795353982300885,
'p': 0.9838888888888889,
'r': 0.9752202643171806},
{'a': 0.8517538054268696,
'f1': 0.8909444985394352,
'p': 0.8198924731182796,
'r': 0.9754797441364605},
{'a': 0.9136400322841001,
'f1': 0.9288090485695276,
'p': 0.9148099606815203,
'r': 0.9432432432432433},
{'a': 0.949293246578416,
'f1': 0.9712248535777948,
'p': 0.9556502129792032,
'r': 0.9873155578565881},
{'a': 0.9605055292259084,
'f1': 0.9787835926449787,
'p': 0.9651324965132496,
'r': 0.9928263988522238},
{'a': 0.9318840579710145,
'f1': 0.9566020313942751,
'p': 0.9316546762589928,
'r': 0.9829222011385199},
{'a': 0.847394540942928,
'f1': 0.8894878706199462,
'p': 0.8256880733944955,
'r': 0.9639727361246349},
{'a': 0.8889570552147239,
'f1': 0.9224174882126017,
'p': 0.8762214983713354,
'r': 0.9737556561085973}],
'thenation.com;2005': [{'a': 0.7414854329093147,
'f1': 0.8374613003095975,
'p': 0.7300944669365722,
'r': 0.9818511796733213},
{'a': 0.5137777777777778,
'f1': 0.6188153310104529,
'p': 0.4563206577595067,
'r': 0.961038961038961},
{'a': 0.7658473479948253,
'f1': 0.852725793327909,
'p': 0.7561327561327561,
'r': 0.9776119402985075},
{'a': 0.849610270518111,
'f1': 0.9101861993428259,
'p': 0.8483920367534457,
'r': 0.9816893089190786},
{'a': 0.5731292517006803,
'f1': 0.6714659685863875,
'p': 0.5202839756592292,
'r': 0.9464944649446494},
{'a': 0.9057798891528107,
'f1': 0.9227774172615184,
'p': 0.8876404494382022,
'r': 0.9608108108108108},
{'a': 0.7099871959026889,
'f1': 0.8026143790849674,
'p': 0.6842496285289748,
'r': 0.9704952581664911},
{'a': 0.8706038487060385,
'f1': 0.8898927159796725,
'p': 0.8668866886688669,
'r': 0.91415313225058},
{'a': 0.7269180754226268,
'f1': 0.8161120840630474,
'p': 0.7039274924471299,
'r': 0.9708333333333333},
{'a': 0.9004950495049505,
'f1': 0.9133247089262614,
'p': 0.9168831168831169,
'r': 0.9097938144329897},
{'a': 0.826677994902294,
'f1': 0.8794326241134752,
'p': 0.8275862068965517,
'r': 0.9382093316519546},
{'a': 0.6467889908256881,
'f1': 0.7636224098234843,
'p': 0.6269691241335854,
'r': 0.9764474975466143},
{'a': 0.8954685890834192,
'f1': 0.913946587537092,
'p': 0.9120135363790186,
'r': 0.9158878504672897},
{'a': 0.7067342505430847,
'f1': 0.8055688910225637,
'p': 0.6894001643385373,
'r': 0.9688221709006929},
{'a': 0.7941558441558442,
'f1': 0.8731492597038816,
'p': 0.7905797101449276,
'r': 0.9749776586237712},
{'a': 0.9431714023831348,
'f1': 0.9671610169491526,
'p': 0.9620653319283456,
'r': 0.972310969116081},
{'a': 0.8959881129271917,
'f1': 0.9042407660738714,
'p': 0.8789893617021277,
'r': 0.9309859154929577},
{'a': 0.9060481503229595,
'f1': 0.9272727272727272,
'p': 0.9082813891362422,
'r': 0.947075208913649},
{'a': 0.8342046303211351,
'f1': 0.8497970230040597,
'p': 0.7733990147783252,
'r': 0.9429429429429429},
{'a': 0.8912901113294041,
'f1': 0.9011904761904763,
'p': 0.8822843822843823,
'r': 0.9209245742092458},
{'a': 0.842873831775701,
'f1': 0.8663686040735221,
'p': 0.8126747437092264,
'r': 0.9276595744680851},
{'a': 0.905373831775701,
'f1': 0.9209756097560975,
'p': 0.9129593810444874,
'r': 0.9291338582677166},
{'a': 0.865615141955836,
'f1': 0.8735905044510386,
'p': 0.8382687927107062,
'r': 0.9120198265179678},
{'a': 0.8798283261802575,
'f1': 0.89937106918239,
'p': 0.8674176776429809,
'r': 0.933768656716418},
{'a': 0.7283018867924528,
'f1': 0.8226600985221675,
'p': 0.712457337883959,
'r': 0.9731934731934732}],
'thenation.com;2010': [{'a': 0.675764192139738,
'f1': 0.6285178236397749,
'p': 0.489766081871345,
'r': 0.8769633507853403},
{'a': 0.6671180931744312,
'f1': 0.6212634822804315,
'p': 0.4818355640535373,
'r': 0.8742411101474414},
{'a': 0.6322725012431626,
'f1': 0.586756077116513,
'p': 0.44043624161073824,
'r': 0.8786610878661087},
{'a': 0.7086073777523592,
'f1': 0.647282796815507,
'p': 0.5114879649890591,
'r': 0.88124410933082},
{'a': 0.7182883341823739,
'f1': 0.7089473684210525,
'p': 0.5836221837088388,
'r': 0.9028150134048257},
{'a': 0.8147023086269745,
'f1': 0.45045045045045046,
'p': 0.3246753246753247,
'r': 0.7352941176470589},
{'a': 0.657844387755102,
'f1': 0.49695264885138307,
'p': 0.3559435862995299,
'r': 0.8229813664596274},
{'a': 0.4285228624851266,
'f1': 0.40663607483233327,
'p': 0.2612244897959184,
'r': 0.9171974522292994},
{'a': 0.8819702602230484,
'f1': 0.9008973858759267,
'p': 0.8726379440665155,
'r': 0.9310483870967742},
{'a': 0.8352638352638353,
'f1': 0.787551867219917,
'p': 0.7684210526315789,
'r': 0.8076595744680851},
{'a': 0.7230172927847347,
'f1': 0.6279535442531037,
'p': 0.5275908479138627,
'r': 0.7754698318496538},
{'a': 0.6940684223480187,
'f1': 0.6834733893557422,
'p': 0.5502255022550225,
'r': 0.9018817204301075},
{'a': 0.6265653869841922,
'f1': 0.6345187864175206,
'p': 0.48584615384615387,
'r': 0.9143022582513028},
{'a': 0.6097623966942148,
'f1': 0.5624094989863887,
'p': 0.4090143218197136,
'r': 0.8999073215940686},
{'a': 0.7384384384384385,
'f1': 0.6553225168183617,
'p': 0.553475935828877,
'r': 0.8031037827352085},
{'a': 0.5977851083883129,
'f1': 0.5124250214224507,
'p': 0.3676229508196721,
'r': 0.8454288407163054},
{'a': 0.7416363034117257,
'f1': 0.6842105263157894,
'p': 0.5588624338624338,
'r': 0.8820459290187892},
{'a': 0.7345368452204795,
'f1': 0.712592117910926,
'p': 0.5868073878627968,
'r': 0.9070146818923328},
{'a': 0.36462324393358875,
'f1': 0.40454817474566124,
'p': 0.2586404795306721,
'r': 0.9281464530892448},
{'a': 0.5604063701263042,
'f1': 0.4226469527587451,
'p': 0.28309178743961355,
'r': 0.833570412517781},
{'a': 0.5278008298755187,
'f1': 0.4557627929220469,
'p': 0.31123448726322667,
'r': 0.8508928571428571},
{'a': 0.8262844166903207,
'f1': 0.80875,
'p': 0.7398513436249285,
'r': 0.8917987594762233},
{'a': 0.5724090597117364,
'f1': 0.5379480840543882,
'p': 0.3837742504409171,
'r': 0.8991735537190083},
{'a': 0.7998363785110445,
'f1': 0.78475073313783,
'p': 0.6904024767801857,
'r': 0.9089673913043478},
{'a': 0.7134107027724049,
'f1': 0.6189455636519503,
'p': 0.4935064935064935,
'r': 0.8298850574712644}],
'thenation.com;2015': [{'a': 0.7011661807580175,
'f1': 0.7466007416563658,
'p': 0.631578947368421,
'r': 0.9128463476070529},
{'a': 0.6158984635938544,
'f1': 0.5607333842627961,
'p': 0.42305475504322765,
'r': 0.8312570781426953},
{'a': 0.6486733760292772,
'f1': 0.6437847866419295,
'p': 0.5090464547677261,
'r': 0.8755256518082423},
{'a': 0.7768453502312039,
'f1': 0.83955177933752,
'p': 0.7604282846308276,
'r': 0.9370533260032985},
{'a': 0.6675358539765319,
'f1': 0.6966452533904354,
'p': 0.567222006974041,
'r': 0.9025893958076449},
{'a': 0.6472923164162178,
'f1': 0.6589912280701754,
'p': 0.5306843267108168,
'r': 0.8691250903832248},
{'a': 0.6458094144661309,
'f1': 0.6523943661971832,
'p': 0.5220919747520288,
'r': 0.8693693693693694},
{'a': 0.543138866064092,
'f1': 0.3110285006195787,
'p': 0.19670846394984326,
'r': 0.742603550295858},
{'a': 0.6071055381400209,
'f1': 0.5534441805225654,
'p': 0.40853302162478083,
'r': 0.8576687116564418},
{'a': 0.6504384638645297,
'f1': 0.6545128511655709,
'p': 0.5179754020813624,
'r': 0.8887987012987013},
{'a': 0.5240253853127833,
'f1': 0.14634146341463414,
'p': 0.08272058823529412,
'r': 0.6338028169014085},
{'a': 0.6274137385248496,
'f1': 0.5923103567717354,
'p': 0.45943041375604515,
'r': 0.8333333333333334},
{'a': 0.650899593731863,
'f1': 0.6559908492993995,
'p': 0.5256645279560037,
'r': 0.8722433460076046},
{'a': 0.6512681159420289,
'f1': 0.6526315789473685,
'p': 0.514218009478673,
'r': 0.8930041152263375},
{'a': 0.6519756838905775,
'f1': 0.6560528687293481,
'p': 0.5182724252491694,
'r': 0.8936170212765957},
{'a': 0.5417523652817771,
'f1': 0.2967171717171717,
'p': 0.18905872888173772,
'r': 0.6891495601173021},
{'a': 0.7307525010874293,
'f1': 0.7867723045125732,
'p': 0.6813842482100239,
'r': 0.9307253463732681},
{'a': 0.7786984031334739,
'f1': 0.8467716699697507,
'p': 0.7636876763875823,
'r': 0.9501404494382022},
{'a': 0.7297186280550421,
'f1': 0.7874677002583979,
'p': 0.6924169270093723,
'r': 0.9127667540247099},
{'a': 0.7475834397227795,
'f1': 0.8104109589041095,
'p': 0.7160493827160493,
'r': 0.9334174818554749},
{'a': 0.6403210867551713,
'f1': 0.6274384393987849,
'p': 0.49520444220090865,
'r': 0.856020942408377},
{'a': 0.6655328798185941,
'f1': 0.6894736842105262,
'p': 0.561990561990562,
'r': 0.8917631041524847},
{'a': 0.7591199699135013,
'f1': 0.8191444303261329,
'p': 0.7310987903225806,
'r': 0.9313001605136436},
{'a': 0.5442651548190144,
'f1': 0.23218221895664953,
'p': 0.14044444444444446,
'r': 0.6694915254237288},
{'a': 0.612482853223594,
'f1': 0.5592823712948518,
'p': 0.41589327146171695,
'r': 0.8535714285714285}],
'www.cnn.com;2000': [{'a': 0.8130899937067338,
'f1': 0.7341092211280216,
'p': 0.68561872909699,
'r': 0.789980732177264},
{'a': 0.7033918691363964,
'f1': 0.7619231511874879,
'p': 0.6567909454061251,
'r': 0.9071264367816092},
{'a': 0.9567706842255941,
'f1': 0.9742628259757967,
'p': 0.9855172413793103,
'r': 0.963262554769127},
{'a': 0.7296494355317885,
'f1': 0.6033129904097646,
'p': 0.4798890429958391,
'r': 0.812206572769953},
{'a': 0.8298865910607072,
'f1': 0.7038327526132403,
'p': 0.62217659137577,
'r': 0.8101604278074866},
{'a': 0.8375254928619986,
'f1': 0.8686813186813187,
'p': 0.8187467633350596,
'r': 0.92510239906378},
{'a': 0.7469262295081968,
'f1': 0.77255985267035,
'p': 0.6549570647931303,
'r': 0.941638608305275},
{'a': 0.812,
'f1': 0.8061056105610561,
'p': 0.8196308724832215,
'r': 0.7930194805194806},
{'a': 0.7922141119221411,
'f1': 0.7634349030470916,
'p': 0.7638580931263859,
'r': 0.7630121816168328},
{'a': 0.8499701135684399,
'f1': 0.8163862472567666,
'p': 0.7994269340974212,
'r': 0.8340807174887892},
{'a': 0.7581291759465479,
'f1': 0.7517146776406034,
'p': 0.648776637726914,
'r': 0.8934782608695652},
{'a': 0.8777838131450298,
'f1': 0.8659916617033949,
'p': 0.8453488372093023,
'r': 0.8876678876678876},
{'a': 0.758496395468589,
'f1': 0.7159297395517868,
'p': 0.606776180698152,
'r': 0.8729689807976366},
{'a': 0.8103021297672115,
'f1': 0.8004168837936425,
'p': 0.7427466150870407,
'r': 0.8677966101694915},
{'a': 0.877246653919694,
'f1': 0.896551724137931,
'p': 0.8798228969006958,
'r': 0.9139290407358739},
{'a': 0.7289398280802293,
'f1': 0.6266771902131018,
'p': 0.5,
'r': 0.8393234672304439},
{'a': 0.7263533610945866,
'f1': 0.727810650887574,
'p': 0.5896452540747843,
'r': 0.9505409582689336},
{'a': 0.7041499330655957,
'f1': 0.44191919191919193,
'p': 0.30594405594405594,
'r': 0.7954545454545454},
{'a': 0.8116094986807388,
'f1': 0.7698259187620888,
'p': 0.766367137355584,
'r': 0.7733160621761658},
{'a': 0.8488805970149254,
'f1': 0.7996702390766693,
'p': 0.7601880877742947,
'r': 0.8434782608695652},
{'a': 0.8317631224764468,
'f1': 0.7093023255813954,
'p': 0.613682092555332,
'r': 0.8402203856749312},
{'a': 0.8600891861761427,
'f1': 0.8767795778105055,
'p': 0.8703703703703703,
'r': 0.8832838773491593},
{'a': 0.8866200967221923,
'f1': 0.9090909090909092,
'p': 0.9009393680614859,
'r': 0.9173913043478261},
{'a': 0.7409985597695631,
'f1': 0.7998516045260621,
'p': 0.7085113374958922,
'r': 0.9182282793867121},
{'a': 0.7255568138920347,
'f1': 0.7238890998860615,
'p': 0.6192332683560754,
'r': 0.8711151736745887}],
'www.cnn.com;2005': [{'a': 0.7973986993496749,
'f1': 0.6505608283002589,
'p': 0.5568685376661743,
'r': 0.7821576763485477},
{'a': 0.7814922480620154,
'f1': 0.6538756715272448,
'p': 0.534504391468005,
'r': 0.841897233201581},
{'a': 0.8120333772507685,
'f1': 0.7855711422845693,
'p': 0.7101449275362319,
'r': 0.8789237668161435},
{'a': 0.7939339875111507,
'f1': 0.7072243346007605,
'p': 0.6421173762945915,
'r': 0.7870239774330042},
{'a': 0.7925133689839572,
'f1': 0.6040816326530613,
'p': 0.4860426929392447,
'r': 0.7978436657681941},
{'a': 0.8149480415667466,
'f1': 0.7729279058361942,
'p': 0.701067615658363,
'r': 0.8612021857923498},
{'a': 0.7992213570634038,
'f1': 0.6518804243008679,
'p': 0.5425361155698234,
'r': 0.8164251207729468},
{'a': 0.7974481658692185,
'f1': 0.6186186186186187,
'p': 0.5132890365448505,
'r': 0.7783375314861462},
{'a': 0.8134087237479806,
'f1': 0.8023952095808382,
'p': 0.7397476340694006,
'r': 0.8766355140186916},
{'a': 0.8281767955801105,
'f1': 0.5576102418207681,
'p': 0.45794392523364486,
'r': 0.7127272727272728},
{'a': 0.8291413703382481,
'f1': 0.7895299145299146,
'p': 0.7147001934235977,
'r': 0.8818615751789977},
{'a': 0.8012170385395537,
'f1': 0.6512455516014235,
'p': 0.5414201183431953,
'r': 0.8169642857142857},
{'a': 0.7987890079180252,
'f1': 0.6940509915014165,
'p': 0.6041923551171393,
'r': 0.8153078202995009},
{'a': 0.8547993019197208,
'f1': 0.8256496227996648,
'p': 0.8047385620915033,
'r': 0.8476764199655766},
{'a': 0.8202293202293203,
'f1': 0.7898516036381045,
'p': 0.7313829787234043,
'r': 0.858480749219563},
{'a': 0.7980817768803634,
'f1': 0.6563573883161512,
'p': 0.5568513119533528,
'r': 0.799163179916318},
{'a': 0.8632313056954669,
'f1': 0.8067870826491517,
'p': 0.7543500511770727,
'r': 0.8670588235294118},
{'a': 0.7817047817047817,
'f1': 0.7172859450726979,
'p': 0.6195348837209302,
'r': 0.8516624040920716},
{'a': 0.7955215085444903,
'f1': 0.6320254506892895,
'p': 0.5173611111111112,
'r': 0.8119891008174387},
{'a': 0.7862723214285714,
'f1': 0.5379975874547648,
'p': 0.41838649155722324,
'r': 0.7533783783783784},
{'a': 0.8092676872155565,
'f1': 0.7634684453565932,
'p': 0.6914498141263941,
'r': 0.852233676975945},
{'a': 0.8084622383985441,
'f1': 0.7710712343665034,
'p': 0.6910331384015594,
'r': 0.8720787207872078},
{'a': 0.819971870604782,
'f1': 0.7408906882591093,
'p': 0.6428571428571429,
'r': 0.8742038216560509},
{'a': 0.8246376811594203,
'f1': 0.6657458563535911,
'p': 0.5751789976133651,
'r': 0.7901639344262295},
{'a': 0.8191964285714286,
'f1': 0.8329896907216495,
'p': 0.7816473189607518,
'r': 0.8915510718789408}],
'www.cnn.com;2010': [{'a': 0.7275031685678074,
'f1': 0.7248880358285349,
'p': 0.6343784994400896,
'r': 0.8455223880597015},
{'a': 0.6324081020255063,
'f1': 0.5346628679962013,
'p': 0.39900779588944013,
'r': 0.8100719424460432},
{'a': 0.7277505255781359,
'f1': 0.7082238077356365,
'p': 0.6224422442244224,
'r': 0.8214285714285714},
{'a': 0.6537997587454765,
'f1': 0.702127659574468,
'p': 0.5600165562913907,
'r': 0.9408901251738526},
{'a': 0.5586563307493541,
'f1': 0.14600000000000002,
'p': 0.09193954659949623,
'r': 0.35436893203883496},
{'a': 0.6614678899082569,
'f1': 0.4728571428571429,
'p': 0.338100102145046,
'r': 0.7862232779097387},
{'a': 0.45656706045865186,
'f1': 0.3919129082426127,
'p': 0.25237856785177765,
'r': 0.8765217391304347},
{'a': 0.6976923076923077,
'f1': 0.6330532212885154,
'p': 0.5191424196018377,
'r': 0.8110047846889952},
{'a': 0.6745749308026888,
'f1': 0.6304445442299056,
'p': 0.5254491017964071,
'r': 0.7878787878787878},
{'a': 0.7147385103011094,
'f1': 0.7341996455995274,
'p': 0.646049896049896,
'r': 0.8502051983584131},
{'a': 0.6349760139555168,
'f1': 0.39303843364757074,
'p': 0.26965174129353237,
'r': 0.7245989304812834},
{'a': 0.6150234741784038,
'f1': 0.5858585858585859,
'p': 0.4628307433851323,
'r': 0.7979724837074583},
{'a': 0.6288178224937119,
'f1': 0.5872952457051538,
'p': 0.4596622889305816,
'r': 0.8130530973451328},
{'a': 0.660952380952381,
'f1': 0.6959863364645602,
'p': 0.5705285264263213,
'r': 0.8921729611384783},
{'a': 0.21855983772819473,
'f1': 0.10666666666666667,
'p': 0.05723172628304821,
'r': 0.7829787234042553},
{'a': 0.6026184058529072,
'f1': 0.45454545454545453,
'p': 0.3225806451612903,
'r': 0.7692307692307693},
{'a': 0.6807069219440354,
'f1': 0.7090713902308106,
'p': 0.6051305542830967,
'r': 0.8561244329228775},
{'a': 0.605606258148631,
'f1': 0.5437405731523378,
'p': 0.41460609545715926,
'r': 0.7897042716319824},
{'a': 0.7437995397596523,
'f1': 0.7909015025041735,
'p': 0.7159047978843974,
'r': 0.8834498834498834},
{'a': 0.5363106014886341,
'f1': 0.41541973116916053,
'p': 0.28477051460361613,
'r': 0.767572633552015},
{'a': 0.11181766218919692,
'f1': 0.0687691961944715,
'p': 0.03584817244611059,
'r': 0.8422018348623853},
{'a': 0.6964824120603015,
'f1': 0.702950819672131,
'p': 0.6025857223159078,
'r': 0.8434303697875688},
{'a': 0.6620408163265306,
'f1': 0.5460526315789473,
'p': 0.4188393608074012,
'r': 0.784251968503937},
{'a': 0.7014111610006415,
'f1': 0.681491618200479,
'p': 0.5817757009345794,
'r': 0.8224607762180016},
{'a': 0.6079678607298292,
'f1': 0.5402434236356498,
'p': 0.4105011933174224,
'r': 0.7898966704936854}],
'www.cnn.com;2015': [{'a': 0.5461303017052908,
'f1': 0.6239130434782609,
'p': 0.47385800770500824,
'r': 0.9130434782608695},
{'a': 0.30808337569903405,
'f1': 0.3601316408086506,
'p': 0.22635933806146571,
'r': 0.8804597701149425},
{'a': 0.3871693866066404,
'f1': 0.33475870494807575,
'p': 0.21076923076923076,
'r': 0.8130563798219584},
{'a': 0.237528699645168,
'f1': 0.29844440176685233,
'p': 0.1796116504854369,
'r': 0.8819523269012486},
{'a': 0.28893905191873587,
'f1': 0.34402332361516036,
'p': 0.21354705274043434,
'r': 0.8843683083511777},
{'a': 0.487090367428004,
'f1': 0.5143394452280208,
'p': 0.3690958164642375,
'r': 0.8480620155038759},
{'a': 0.44073455759599334,
'f1': 0.4450579790171176,
'p': 0.30142109199700823,
'r': 0.8502109704641351},
{'a': 0.5386666666666666,
'f1': 0.5942142298670837,
'p': 0.4439252336448598,
'r': 0.8983451536643026},
{'a': 0.21576673866090712,
'f1': 0.2719069580910367,
'p': 0.1609304533586518,
'r': 0.875968992248062},
{'a': 0.10892214434551999,
'f1': 0.07365104371799922,
'p': 0.038901601830663615,
'r': 0.6900369003690037},
{'a': 0.30060493252675663,
'f1': 0.3346613545816733,
'p': 0.2074643249176729,
'r': 0.8649885583524027},
{'a': 0.2972493345164153,
'f1': 0.21739130434782608,
'p': 0.12746234067207415,
'r': 0.738255033557047},
{'a': 0.5377104377104377,
'f1': 0.551453773276707,
'p': 0.402479732951836,
'r': 0.8755186721991701},
{'a': 0.6343705799151343,
'f1': 0.731009365244537,
'p': 0.5955913522679102,
'r': 0.9461279461279462},
{'a': 0.4623908663532572,
'f1': 0.5923096511331806,
'p': 0.4293097083794758,
'r': 0.9548440065681445},
{'a': 0.5055798156234838,
'f1': 0.5628485628485629,
'p': 0.40923268870867124,
'r': 0.9010989010989011},
{'a': 0.20300230946882217,
'f1': 0.22848200312989042,
'p': 0.13204134366925063,
'r': 0.8474295190713101},
{'a': 0.40312876052948254,
'f1': 0.36,
'p': 0.23153526970954358,
'r': 0.808695652173913},
{'a': 0.5251872021783526,
'f1': 0.5427728613569321,
'p': 0.39372325249643364,
'r': 0.8734177215189873},
{'a': 0.29772374547335745,
'f1': 0.3710910354412787,
'p': 0.235657546337158,
'r': 0.8725490196078431},
{'a': 0.2304075235109718,
'f1': 0.24980901451489684,
'p': 0.14617791685292802,
'r': 0.8582677165354331},
{'a': 0.6290977208866687,
'f1': 0.7113702623906706,
'p': 0.5729941291585127,
'r': 0.9378603459320948},
{'a': 0.4444444444444444,
'f1': 0.4135188866799205,
'p': 0.27030539311241064,
'r': 0.879492600422833},
{'a': 0.5819144911085887,
'f1': 0.6747129820429792,
'p': 0.526896551724138,
'r': 0.9378068739770867},
{'a': 0.4675090252707581,
'f1': 0.4082246740220662,
'p': 0.27352150537634407,
'r': 0.8043478260869565}],
'www.esquire.com;2000': [{'a': 0.9610738255033557,
'f1': 0.9452830188679244,
'p': 0.9488636363636364,
'r': 0.9417293233082706},
{'a': 0.9625829812914907,
'f1': 0.9585006693440428,
'p': 0.9636608344549125,
'r': 0.9533954727030626},
{'a': 0.9491106719367589,
'f1': 0.9352608422375865,
'p': 0.9649805447470817,
'r': 0.9073170731707317},
{'a': 0.9604743083003953,
'f1': 0.9330357142857143,
'p': 0.9393258426966292,
'r': 0.926829268292683},
{'a': 0.9659798754192621,
'f1': 0.9649382716049383,
'p': 0.9731075697211156,
'r': 0.9569049951028403},
{'a': 0.9618320610687023,
'f1': 0.9403578528827038,
'p': 0.946,
'r': 0.9347826086956522},
{'a': 0.8702734147760326,
'f1': 0.7635206786850478,
'p': 0.7003891050583657,
'r': 0.8391608391608392},
{'a': 0.9437291368621841,
'f1': 0.9423264907135875,
'p': 0.9698189134808853,
'r': 0.9163498098859315},
{'a': 0.9411764705882353,
'f1': 0.8988988988988988,
'p': 0.9432773109243697,
'r': 0.858508604206501},
{'a': 0.954456415279138,
'f1': 0.9556931872320154,
'p': 0.9737864077669903,
'r': 0.9382600561272217},
{'a': 0.9400584795321637,
'f1': 0.9076576576576577,
'p': 0.9372093023255814,
'r': 0.8799126637554585},
{'a': 0.9430379746835443,
'f1': 0.9165964616680706,
'p': 0.9527145359019265,
'r': 0.8831168831168831},
{'a': 0.9679519278918377,
'f1': 0.96529284164859,
'p': 0.9705561613958561,
'r': 0.9600862998921251},
{'a': 0.9479048697621744,
'f1': 0.9159049360146252,
'p': 0.9488636363636364,
'r': 0.8851590106007067},
{'a': 0.9504480759093306,
'f1': 0.9304733727810651,
'p': 0.9588414634146342,
'r': 0.9037356321839081},
{'a': 0.9373088685015291,
'f1': 0.8918205804749341,
'p': 0.9234972677595629,
'r': 0.8622448979591837},
{'a': 0.9499749874937469,
'f1': 0.9528746465598492,
'p': 0.9674641148325359,
'r': 0.9387186629526463},
{'a': 0.950109649122807,
'f1': 0.9334308705193854,
'p': 0.9579579579579579,
'r': 0.9101283880171184},
{'a': 0.9045736871823828,
'f1': 0.842203548085901,
'p': 0.803921568627451,
'r': 0.884313725490196},
{'a': 0.8260325406758448,
'f1': 0.576219512195122,
'p': 0.4833759590792839,
'r': 0.7132075471698113},
{'a': 0.9553314121037464,
'f1': 0.9345991561181434,
'p': 0.9425531914893617,
'r': 0.9267782426778243},
{'a': 0.9464387464387465,
'f1': 0.9426829268292682,
'p': 0.9650436953807741,
'r': 0.9213349225268176},
{'a': 0.943345804382683,
'f1': 0.9206586826347306,
'p': 0.9564541213063764,
'r': 0.8874458874458875},
{'a': 0.1,
'f1': 0.09999999999999999,
'p': 0.05555555555555555,
'r': 0.5},
{'a': 0.9533295389869095,
'f1': 0.9243542435424354,
'p': 0.9488636363636364,
'r': 0.9010791366906474}],
'www.esquire.com;2005': [{'a': 0.9530398322851154,
'f1': 0.9464114832535886,
'p': 0.9611273080660836,
'r': 0.9321394910461829},
{'a': 0.9465422146796776,
'f1': 0.9371884346959123,
'p': 0.9572301425661914,
'r': 0.91796875},
{'a': 0.9585714285714285,
'f1': 0.9390329362298528,
'p': 0.938375350140056,
'r': 0.9396914446002805},
{'a': 0.9636363636363636,
'f1': 0.9542857142857142,
'p': 0.9553775743707094,
'r': 0.9531963470319634},
{'a': 0.9630901287553648,
'f1': 0.9570858283433133,
'p': 0.9609218436873748,
'r': 0.9532803180914513},
{'a': 0.9211159211159211,
'f1': 0.8790560471976402,
'p': 0.8989441930618401,
'r': 0.86002886002886},
{'a': 0.9058993847267462,
'f1': 0.9183417085427136,
'p': 0.8702380952380953,
'r': 0.9720744680851063},
{'a': 0.9181996086105675,
'f1': 0.9040844424047728,
'p': 0.8747779751332149,
'r': 0.9354226020892688},
{'a': 0.9591222030981067,
'f1': 0.9522373051784816,
'p': 0.958502024291498,
'r': 0.9460539460539461},
{'a': 0.9603463992707384,
'f1': 0.9502572898799314,
'p': 0.9432463110102156,
'r': 0.9573732718894009},
{'a': 0.9429404414827155,
'f1': 0.9302798982188295,
'p': 0.9185929648241206,
'r': 0.9422680412371134},
{'a': 0.920041004613019,
'f1': 0.8664383561643836,
'p': 0.840531561461794,
'r': 0.8939929328621908},
{'a': 0.9002638522427441,
'f1': 0.815968841285297,
'p': 0.7688073394495413,
'r': 0.8692946058091287},
{'a': 0.9523595505617978,
'f1': 0.9440928270042194,
'p': 0.9582441113490364,
'r': 0.9303534303534303},
{'a': 0.9564459930313589,
'f1': 0.9479166666666666,
'p': 0.9479166666666666,
'r': 0.9479166666666666},
{'a': 0.8827899298390425,
'f1': 0.8565656565656565,
'p': 0.7969924812030075,
'r': 0.925764192139738},
{'a': 0.9586449626044875,
'f1': 0.9499467518636849,
'p': 0.958109559613319,
'r': 0.941921858500528},
{'a': 0.9640317858636553,
'f1': 0.9595484477892757,
'p': 0.9622641509433962,
'r': 0.9568480300187617},
{'a': 0.9663256606990622,
'f1': 0.9613313754282917,
'p': 0.958984375,
'r': 0.9636898920510304},
{'a': 0.9699303263659699,
'f1': 0.9707142857142858,
'p': 0.9714081486776269,
'r': 0.9700214132762313},
{'a': 0.9461196243203164,
'f1': 0.920611798980335,
'p': 0.9390787518573551,
'r': 0.9028571428571428},
{'a': 0.948937908496732,
'f1': 0.943155979990905,
'p': 0.9308797127468582,
'r': 0.9557603686635945},
{'a': 0.9541052631578948,
'f1': 0.9472665699080792,
'p': 0.9616895874263262,
'r': 0.9332697807435653},
{'a': 0.9571852479864349,
'f1': 0.9505628976994616,
'p': 0.960435212660732,
'r': 0.9408914728682171},
{'a': 0.9658650116369278,
'f1': 0.9645732689210951,
'p': 0.9684721099434115,
'r': 0.9607056936647955}],
'www.esquire.com;2010': [{'a': 0.5643207012116525,
'f1': 0.4433465085638999,
'p': 0.30660592255125285,
'r': 0.8002378121284186},
{'a': 0.6595404595404596,
'f1': 0.711284310403253,
'p': 0.5828936406553735,
'r': 0.9122120817036071},
{'a': 0.4677680596047943,
'f1': 0.20742884708152434,
'p': 0.12092238470191226,
'r': 0.7288135593220338},
{'a': 0.5635330578512396,
'f1': 0.5412595005428882,
'p': 0.3940711462450593,
'r': 0.8639514731369151},
{'a': 0.5767780849459663,
'f1': 0.5699693564862105,
'p': 0.426279602750191,
'r': 0.8597842835130971},
{'a': 0.530896150113232,
'f1': 0.4304791830322074,
'p': 0.2860125260960334,
'r': 0.8698412698412699},
{'a': 0.4075716234652115,
'f1': 0.286652977412731,
'p': 0.1713303878252332,
'r': 0.8768844221105527},
{'a': 0.5689839572192513,
'f1': 0.5470213563132258,
'p': 0.3924731182795699,
'r': 0.9023485784919654},
{'a': 0.5669208519589798,
'f1': 0.5385261978145138,
'p': 0.3895419537900284,
'r': 0.8720508166969148},
{'a': 0.5872011251758087,
'f1': 0.6014935505770536,
'p': 0.4532742155525239,
'r': 0.8937457969065232},
{'a': 0.5289658906334597,
'f1': 0.45861854387056633,
'p': 0.3207136640557006,
'r': 0.8045851528384279},
{'a': 0.5566271700192891,
'f1': 0.5137503777576308,
'p': 0.3648068669527897,
'r': 0.8682328907048008},
{'a': 0.6042534531900899,
'f1': 0.6395046934291992,
'p': 0.49050245098039214,
'r': 0.9185312679288583},
{'a': 0.5003152585119798,
'f1': 0.31355565179731487,
'p': 0.19738276990185388,
'r': 0.7621052631578947},
{'a': 0.5922096657850445,
'f1': 0.5956127801621364,
'p': 0.45237232886635276,
'r': 0.8715980460572226},
{'a': 0.49784791965566716,
'f1': 0.2143658810325477,
'p': 0.12402597402597403,
'r': 0.7892561983471075},
{'a': 0.3755117231112765,
'f1': 0.17583497053045186,
'p': 0.0982436882546652,
'r': 0.8364485981308412},
{'a': 0.5544525547445256,
'f1': 0.5058290155440415,
'p': 0.35164340387212967,
'r': 0.9008073817762399},
{'a': 0.568724279835391,
'f1': 0.5549263873159682,
'p': 0.40312628547922663,
'r': 0.8900999091734787},
{'a': 0.6699975018735949,
'f1': 0.681763430498675,
'p': 0.5465430668211665,
'r': 0.9058898847631242},
{'a': 0.40389294403892945,
'f1': 0.26109435588108576,
'p': 0.15380710659898478,
'r': 0.8632478632478633},
{'a': 0.391304347826087,
'f1': 0.2546583850931677,
'p': 0.1490134994807892,
'r': 0.875},
{'a': 0.6918226600985221,
'f1': 0.7528445006321113,
'p': 0.629492600422833,
'r': 0.9363207547169812},
{'a': 0.5951573849878935,
'f1': 0.6032273374466066,
'p': 0.45785302593659943,
'r': 0.8838664812239221},
{'a': 0.630575117370892,
'f1': 0.6488145048814504,
'p': 0.5043365134431916,
'r': 0.9093041438623924}],
'www.esquire.com;2015': [{'a': 0.28155849110591824,
'f1': 0.01374795417348609,
'p': 0.006949500297835727,
'r': 0.6325301204819277},
{'a': 0.28711102754536055,
'f1': 0.03327383987761346,
'p': 0.017044341409260106,
'r': 0.696},
{'a': 0.27682545695615113,
'f1': 0.014328127016909773,
'p': 0.007239287810604579,
'r': 0.6894409937888198},
{'a': 0.46926977687626775,
'f1': 0.08112379280070238,
'p': 0.04307290695506247,
'r': 0.6957831325301205},
{'a': 0.3115534984047095,
'f1': 0.13151927437641722,
'p': 0.07138900855437258,
'r': 0.8339324227174695},
{'a': 0.3296091814111203,
'f1': 0.1882402484602832,
'p': 0.10571124512238382,
'r': 0.8583773403744599},
{'a': 0.2885415703320078,
'f1': 0.06056905605079986,
'p': 0.03155015584250366,
'r': 0.7549467275494672},
{'a': 0.28600444003589814,
'f1': 0.03177043300025621,
'p': 0.016255899318300997,
'r': 0.6966292134831461},
{'a': 0.3803981623277182,
'f1': 0.17495921696574226,
'p': 0.09766647694934548,
'r': 0.8387096774193549},
{'a': 0.480719397828233,
'f1': 0.5357822453876065,
'p': 0.37498552403011004,
'r': 0.9380069524913094},
{'a': 0.2905982905982906,
'f1': 0.05122118808170405,
'p': 0.026523482986156036,
'r': 0.7441016333938294},
{'a': 0.42642440556303274,
'f1': 0.22177133001927563,
'p': 0.12893712398254098,
'r': 0.7920289855072464},
{'a': 0.28823722302899707,
'f1': 0.04500314267756129,
'p': 0.023218107529671184,
'r': 0.7291242362525459},
{'a': 0.3103961736305388,
'f1': 0.11633420063602197,
'p': 0.06269084564092976,
'r': 0.8060897435897436},
{'a': 0.30111370823594114,
'f1': 0.08463893390959842,
'p': 0.04466265441875198,
'r': 0.8066361556064073},
{'a': 0.36065963688258146,
'f1': 0.0967741935483871,
'p': 0.05160673754629076,
'r': 0.7755834829443446},
{'a': 0.289927787677014,
'f1': 0.05231866825208085,
'p': 0.02706727967363854,
'r': 0.7798507462686567},
{'a': 0.28603752239087393,
'f1': 0.024475074069303104,
'p': 0.012450851900393184,
'r': 0.7142857142857143},
{'a': 0.3594932674687276,
'f1': 0.09215132693393563,
'p': 0.04895608351331893,
'r': 0.783109404990403},
{'a': 0.320497058048652,
'f1': 0.1641911963273022,
'p': 0.090838462917588,
'r': 0.8529741863075196},
{'a': 0.28706446607419944,
'f1': 0.023192887514495556,
'p': 0.011798636601992658,
'r': 0.6766917293233082},
{'a': 0.4193067197045035,
'f1': 0.3059682485779777,
'p': 0.18523848684210525,
'r': 0.878595806923452},
{'a': 0.33102908569192646,
'f1': 0.18479470198675496,
'p': 0.10366759793140344,
'r': 0.8499025341130604},
{'a': 0.32755466504050873,
'f1': 0.1720476241553148,
'p': 0.09607666966157533,
'r': 0.822142491030241},
{'a': 0.35135792460478316,
'f1': 0.058816609810610515,
'p': 0.03058103975535168,
'r': 0.7668711656441718},
{'a': 0.28490255928621744,
'f1': 0.034369055168040584,
'p': 0.01760998115537072,
'r': 0.7112860892388452}],
'www.forbes.com;2000': [{'a': 0.7603195739014648,
'f1': 0.7727272727272727,
'p': 0.6777408637873754,
'r': 0.8986784140969163},
{'a': 0.741304347826087,
'f1': 0.75564681724846,
'p': 0.6216216216216216,
'r': 0.9633507853403142},
{'a': 0.8787784356497351,
'f1': 0.9231073334651118,
'p': 0.8801356954391255,
'r': 0.9704904405652536},
{'a': 0.7747368421052632,
'f1': 0.7995003123048094,
'p': 0.6837606837606838,
'r': 0.9624060150375939},
{'a': 0.6669542709232097,
'f1': 0.5150753768844221,
'p': 0.36541889483065954,
'r': 0.8723404255319149},
{'a': 0.7552631578947369,
'f1': 0.757496740547588,
'p': 0.6579841449603624,
'r': 0.8924731182795699},
{'a': 0.8159443552701979,
'f1': 0.8610662358642972,
'p': 0.7781021897810219,
'r': 0.9638336347197106},
{'a': 0.717391304347826,
'f1': 0.6672550750220653,
'p': 0.5550660792951542,
'r': 0.8362831858407079},
{'a': 0.8219106957424714,
'f1': 0.8670027142303218,
'p': 0.7829131652661064,
'r': 0.9713292788879235},
{'a': 0.9711538461538461,
'f1': 0.9811202013845186,
'p': 0.9755944931163955,
'r': 0.9867088607594937},
{'a': 0.711376404494382,
'f1': 0.6888720666161998,
'p': 0.5963302752293578,
'r': 0.8154121863799283},
{'a': 0.880854252529037,
'f1': 0.9228155339805825,
'p': 0.8740229885057471,
'r': 0.9773778920308483},
{'a': 0.6704361873990307,
'f1': 0.5903614457831325,
'p': 0.49830508474576274,
'r': 0.7241379310344828},
{'a': 0.7071742313323572,
'f1': 0.661590524534687,
'p': 0.5634005763688761,
'r': 0.8012295081967213},
{'a': 0.6441837732160313,
'f1': 0.4468085106382979,
'p': 0.32450331125827814,
'r': 0.7170731707317073},
{'a': 0.7682789651293588,
'f1': 0.8140794223826715,
'p': 0.714172604908947,
'r': 0.9464847848898216},
{'a': 0.8058455114822547,
'f1': 0.8495145631067961,
'p': 0.7658643326039387,
'r': 0.9536784741144414},
{'a': 0.7821052631578947,
'f1': 0.8318440292445167,
'p': 0.7361610352264558,
'r': 0.9561157796451915},
{'a': 0.7606382978723404,
'f1': 0.7844598190526876,
'p': 0.7012369172216937,
'r': 0.8900966183574879},
{'a': 0.6845637583892618,
'f1': 0.36199095022624433,
'p': 0.23904382470119523,
'r': 0.7453416149068323},
{'a': 0.9127272727272727,
'f1': 0.84,
'p': 0.7777777777777778,
'r': 0.9130434782608695},
{'a': 0.7254335260115607,
'f1': 0.6900489396411094,
'p': 0.5834482758620689,
'r': 0.844311377245509},
{'a': 0.8086194302410519,
'f1': 0.8335451080050825,
'p': 0.7446083995459705,
'r': 0.9466089466089466},
{'a': 0.7429359062715368,
'f1': 0.7534699272967614,
'p': 0.641169853768279,
'r': 0.9134615384615384},
{'a': 0.8815298507462687,
'f1': 0.9247778874629812,
'p': 0.8794592564776568,
'r': 0.9750208159866778},
{'a': 0.7494922139471902,
'f1': 0.7787081339712919,
'p': 0.6636085626911316,
'r': 0.9421128798842258},
{'a': 0.733142037302726,
'f1': 0.7138461538461538,
'p': 0.5895806861499364,
'r': 0.9044834307992202},
{'a': 0.9517426273458445,
'f1': 0.9647058823529412,
'p': 0.9669811320754716,
'r': 0.9624413145539906}],
'www.forbes.com;2005': [{'a': 0.922463768115942,
'f1': 0.5868725868725868,
'p': 0.4935064935064935,
'r': 0.7238095238095238},
{'a': 0.893611404435058,
'f1': 0.8388644542183126,
'p': 0.8026013771996939,
'r': 0.8785594639865997},
{'a': 0.923974540311174,
'f1': 0.7174770039421814,
'p': 0.6807980049875312,
'r': 0.7583333333333333},
{'a': 0.8789716926632004,
'f1': 0.7067879636109169,
'p': 0.6242274412855378,
'r': 0.8145161290322581},
{'a': 0.9320754716981132,
'f1': 0.8411764705882353,
'p': 0.8125,
'r': 0.8719512195121951},
{'a': 0.9328429804924848,
'f1': 0.8467153284671532,
'p': 0.8215297450424929,
'r': 0.8734939759036144},
{'a': 0.9169241331960178,
'f1': 0.6620111731843575,
'p': 0.5895522388059702,
'r': 0.7547770700636943},
{'a': 0.9158091674462114,
'f1': 0.7926267281105991,
'p': 0.7510917030567685,
'r': 0.8390243902439024},
{'a': 0.9332394366197183,
'f1': 0.8892005610098177,
'p': 0.879740980573543,
'r': 0.8988657844990549},
{'a': 0.8787878787878788,
'f1': 0.0125,
'p': 0.0078125,
'r': 0.03125},
{'a': 0.9221065909807632,
'f1': 0.6535764375876578,
'p': 0.6348773841961853,
'r': 0.6734104046242775},
{'a': 0.9324227174694465,
'f1': 0.7062499999999999,
'p': 0.6420454545454546,
'r': 0.7847222222222222},
{'a': 0.9324990519529769,
'f1': 0.5972850678733032,
'p': 0.5116279069767442,
'r': 0.717391304347826},
{'a': 0.897003745318352,
'f1': 0.7887323943661972,
'p': 0.7368421052631579,
'r': 0.8484848484848485},
{'a': 0.923697270471464,
'f1': 0.8282122905027932,
'p': 0.8179310344827586,
'r': 0.8387553041018387},
{'a': 0.9231056364315043,
'f1': 0.6199261992619928,
'p': 0.56,
'r': 0.6942148760330579},
{'a': 0.9271175311884439,
'f1': 0.8388969521044993,
'p': 0.8210227272727273,
'r': 0.857566765578635},
{'a': 0.9274785801713586,
'f1': 0.8657223796033995,
'p': 0.854586129753915,
'r': 0.8771526980482205},
{'a': 0.9183731513083049,
'f1': 0.8444444444444444,
'p': 0.8358369098712446,
'r': 0.8532311062431545},
{'a': 0.9288114879315612,
'f1': 0.8535512256442489,
'p': 0.8382716049382716,
'r': 0.8693982074263764},
{'a': 0.8757146408153119,
'f1': 0.7795414462081128,
'p': 0.7163695299837926,
'r': 0.8549323017408124},
{'a': 0.918732350172576,
'f1': 0.778063410454156,
'p': 0.7566666666666667,
'r': 0.800705467372134},
{'a': 0.918977202711029,
'f1': 0.8101083032490974,
'p': 0.7912552891396333,
'r': 0.8298816568047337},
{'a': 0.926164136866478,
'f1': 0.8929503916449086,
'p': 0.9047619047619048,
'r': 0.8814432989690721},
{'a': 0.9195926025194318,
'f1': 0.8584905660377359,
'p': 0.8363970588235294,
'r': 0.8817829457364341}],
'www.forbes.com;2010': [{'a': 0.803030303030303,
'f1': 0.4956896551724137,
'p': 0.4121863799283154,
'r': 0.6216216216216216},
{'a': 0.8355041003376749,
'f1': 0.8311045071817731,
'p': 0.8121974830590513,
'r': 0.8509127789046653},
{'a': 0.834983498349835,
'f1': 0.7706422018348623,
'p': 0.6666666666666666,
'r': 0.9130434782608695},
{'a': 0.8385175380542687,
'f1': 0.7881944444444444,
'p': 0.6941896024464832,
'r': 0.9116465863453815},
{'a': 0.8763654419066534,
'f1': 0.8819345661450925,
'p': 0.8355795148247979,
'r': 0.9337349397590361},
{'a': 0.8413705583756346,
'f1': 0.7990353697749196,
'p': 0.7213352685050798,
'r': 0.8954954954954955},
{'a': 0.8154583582983823,
'f1': 0.7220216606498195,
'p': 0.6279434850863422,
'r': 0.8492569002123143},
{'a': 0.8404907975460123,
'f1': 0.8281249999999999,
'p': 0.7291005291005291,
'r': 0.9582753824756607},
{'a': 0.8345111896348646,
'f1': 0.8068728522336769,
'p': 0.7264851485148515,
'r': 0.9072642967542504},
{'a': 0.8261648745519713,
'f1': 0.742249778565102,
'p': 0.6506211180124224,
'r': 0.8639175257731959},
{'a': 0.8678511937812327,
'f1': 0.8598351001177856,
'p': 0.7883369330453563,
'r': 0.9455958549222798},
{'a': 0.8579756226815051,
'f1': 0.8538713195201745,
'p': 0.7997957099080695,
'r': 0.9157894736842105},
{'a': 0.8235294117647058,
'f1': 0.7715481171548119,
'p': 0.6749633967789166,
'r': 0.900390625},
{'a': 0.8467322151532678,
'f1': 0.8222669349429913,
'p': 0.7512254901960784,
'r': 0.9081481481481481},
{'a': 0.822998193859121,
'f1': 0.7762557077625571,
'p': 0.6737120211360634,
'r': 0.9156193895870736},
{'a': 0.8538390379278445,
'f1': 0.8438735177865613,
'p': 0.7870967741935484,
'r': 0.9094781682641108},
{'a': 0.7989521938441388,
'f1': 0.7039537126325941,
'p': 0.6684981684981685,
'r': 0.7433808553971487},
{'a': 0.8128772635814889,
'f1': 0.7360454115421002,
'p': 0.7021660649819494,
'r': 0.7733598409542743},
{'a': 0.8265830005704506,
'f1': 0.7342657342657343,
'p': 0.6552262090483619,
'r': 0.8349900596421471},
{'a': 0.8058429701765064,
'f1': 0.7498039215686273,
'p': 0.6297760210803689,
'r': 0.9263565891472868},
{'a': 0.8155028827674567,
'f1': 0.7587939698492463,
'p': 0.7365853658536585,
'r': 0.7823834196891192},
{'a': 0.5447540011855364,
'f1': 0.1812366737739872,
'p': 0.10278113663845223,
'r': 0.7657657657657657},
{'a': 0.5952802359882006,
'f1': 0.3466666666666667,
'p': 0.21487603305785125,
'r': 0.896551724137931},
{'a': 0.818087318087318,
'f1': 0.8064159292035399,
'p': 0.7098344693281402,
'r': 0.9334186939820742},
{'a': 0.8540609137055838,
'f1': 0.8275862068965517,
'p': 0.739946380697051,
'r': 0.9387755102040817}],
'www.forbes.com;2015': [{'a': 0.6009918845807033,
'f1': 0.5982750794371311,
'p': 0.45354439091534754,
'r': 0.8786666666666667},
{'a': 0.5228988424760946,
'f1': 0.4397163120567376,
'p': 0.30194805194805197,
'r': 0.808695652173913},
{'a': 0.5684647302904564,
'f1': 0.4474616292798111,
'p': 0.31530782029950083,
'r': 0.7703252032520326},
{'a': 0.5637982195845698,
'f1': 0.486013986013986,
'p': 0.347789824854045,
'r': 0.8065764023210832},
{'a': 0.5737037037037037,
'f1': 0.5218113834648941,
'p': 0.38456827924066134,
'r': 0.8113695090439277},
{'a': 0.5261813537675607,
'f1': 0.5360566902876198,
'p': 0.3793510324483776,
'r': 0.9133522727272727},
{'a': 0.6267262388302194,
'f1': 0.6455842653297338,
'p': 0.5097442143727162,
'r': 0.8801261829652997},
{'a': 0.6165389527458492,
'f1': 0.5712245626561941,
'p': 0.45454545454545453,
'r': 0.7684918347742555},
{'a': 0.43635551585529253,
'f1': 0.3984747378455672,
'p': 0.2570725707257073,
'r': 0.885593220338983},
{'a': 0.3153623188405797,
'f1': 0.2385557704706641,
'p': 0.13941220798794274,
'r': 0.8258928571428571},
{'a': 0.5164212910532276,
'f1': 0.3200636942675159,
'p': 0.20447609359104782,
'r': 0.7362637362637363},
{'a': 0.56483191725157,
'f1': 0.602832097100472,
'p': 0.4554253693326541,
'r': 0.8913260219341974},
{'a': 0.608122179798681,
'f1': 0.6417010472865756,
'p': 0.5017369727047146,
'r': 0.8899647887323944},
{'a': 0.5861520095503382,
'f1': 0.607250755287009,
'p': 0.4612736660929432,
'r': 0.8883977900552487},
{'a': 0.5475171232876712,
'f1': 0.5086006508600651,
'p': 0.36321381142098275,
'r': 0.8480620155038759},
{'a': 0.5645315487571702,
'f1': 0.48791455874086564,
'p': 0.3472,
'r': 0.8204158790170132},
{'a': 0.5972944849115505,
'f1': 0.5044814340588989,
'p': 0.3835171966255678,
'r': 0.7369077306733167},
{'a': 0.5224932249322494,
'f1': 0.36297903109182933,
'p': 0.23882017126546146,
'r': 0.7560240963855421},
{'a': 0.49003466204506063,
'f1': 0.46862302483069984,
'p': 0.31860036832412525,
'r': 0.8856655290102389},
{'a': 0.5722679200940071,
'f1': 0.5997067448680351,
'p': 0.4487109160724081,
'r': 0.9038674033149171},
{'a': 0.41139240506329117,
'f1': 0.26235509456985967,
'p': 0.15774027879677183,
'r': 0.7789855072463768},
{'a': 0.5893101873001371,
'f1': 0.5466464952092789,
'p': 0.40813253012048195,
'r': 0.8274809160305343},
{'a': 0.36214185063410054,
'f1': 0.09345794392523364,
'p': 0.050468637346791634,
'r': 0.6306306306306306},
{'a': 0.5615592435353145,
'f1': 0.5968772178850248,
'p': 0.44263157894736843,
'r': 0.9161220043572985},
{'a': 0.5688073394495413,
'f1': 0.531405782652044,
'p': 0.3867924528301887,
'r': 0.8487261146496815}],
'www.foxnews.com;2000': [{'a': 0.9518828451882845,
'f1': 0.9187279151943463,
'p': 0.9285714285714286,
'r': 0.9090909090909091},
{'a': 0.960635359116022,
'f1': 0.8800000000000001,
'p': 0.9330357142857143,
'r': 0.8326693227091634},
{'a': 0.9295958279009127,
'f1': 0.9021739130434783,
'p': 0.8691099476439791,
'r': 0.9378531073446328},
{'a': 0.9138438880706922,
'f1': 0.8956289027653881,
'p': 0.899641577060932,
'r': 0.8916518650088809},
{'a': 0.9568106312292359,
'f1': 0.8987012987012987,
'p': 0.9301075268817204,
'r': 0.8693467336683417},
{'a': 0.9374437443744374,
'f1': 0.9506567270145545,
'p': 0.926002766251729,
'r': 0.9766593727206418},
{'a': 0.7709205020920502,
'f1': 0.7818725099601594,
'p': 0.6618887015177066,
'r': 0.9549878345498783},
{'a': 0.9540372670807453,
'f1': 0.9357638888888891,
'p': 0.9373913043478261,
'r': 0.9341421143847487},
{'a': 0.9479315263908702,
'f1': 0.9557575757575759,
'p': 0.9651162790697675,
'r': 0.946578631452581},
{'a': 0.9376609994848016,
'f1': 0.8826382153249273,
'p': 0.9191919191919192,
'r': 0.8488805970149254},
{'a': 0.966804979253112,
'f1': 0.9065420560747663,
'p': 0.9326923076923077,
'r': 0.8818181818181818},
{'a': 0.8981636060100167,
'f1': 0.8539505187549881,
'p': 0.816793893129771,
'r': 0.8946488294314381},
{'a': 0.9306184012066365,
'f1': 0.8696883852691218,
'p': 0.8319783197831978,
'r': 0.9109792284866469},
{'a': 0.9382022471910112,
'f1': 0.9022222222222221,
'p': 0.8638297872340426,
'r': 0.9441860465116279},
{'a': 0.2631578947368421,
'f1': 0.3,
'p': 0.17647058823529413,
'r': 1.0},
{'a': 0.9135060129509713,
'f1': 0.9034589571502323,
'p': 0.8901322482197355,
'r': 0.9171907756813418},
{'a': 0.9339651482726995,
'f1': 0.9274680993955676,
'p': 0.933739012846518,
'r': 0.9212808539026017},
{'a': 0.9387078961899503,
'f1': 0.9285254346426272,
'p': 0.9327296248382924,
'r': 0.9243589743589744},
{'a': 0.933705512909979,
'f1': 0.8966267682263331,
'p': 0.865546218487395,
'r': 0.9300225733634312},
{'a': 0.9861636951882701,
'f1': 0.9925436806766332,
'p': 0.9959797498511018,
'r': 0.9891312384473198},
{'a': 0.9393139841688655,
'f1': 0.936986301369863,
'p': 0.9173819742489271,
'r': 0.9574468085106383},
{'a': 0.8996683250414593,
'f1': 0.9202373104812129,
'p': 0.8914431673052363,
'r': 0.9509536784741145},
{'a': 0.9659790083242852,
'f1': 0.9745533297238765,
'p': 0.9787928221859706,
'r': 0.9703504043126685},
{'a': 0.949358059914408,
'f1': 0.9095541401273886,
'p': 0.9037974683544304,
'r': 0.9153846153846154},
{'a': 0.9705240174672489,
'f1': 0.9608695652173913,
'p': 0.9822222222222222,
'r': 0.9404255319148936}],
'www.foxnews.com;2005': [{'a': 0.6034149484536082,
'f1': 0.6691749529696318,
'p': 0.5075417855686915,
'r': 0.9818611987381703},
{'a': 0.442833607907743,
'f1': 0.45749117741418033,
'p': 0.30135249366018596,
'r': 0.9494007989347537},
{'a': 0.458528951486698,
'f1': 0.4917743830787309,
'p': 0.33466613354658137,
'r': 0.9269102990033222},
{'a': 0.4705693148922483,
'f1': 0.5036188178528348,
'p': 0.3471933471933472,
'r': 0.9165751920965971},
{'a': 0.4147383410466358,
'f1': 0.3946980854197349,
'p': 0.24907063197026022,
'r': 0.950354609929078},
{'a': 0.3608128834355828,
'f1': 0.2672527472527472,
'p': 0.15565796210957503,
'r': 0.9440993788819876},
{'a': 0.551033386327504,
'f1': 0.6097291321171918,
'p': 0.44565656565656564,
'r': 0.9650043744531933},
{'a': 0.599594868332208,
'f1': 0.3762272089761571,
'p': 0.24183006535947713,
'r': 0.8468823993685872},
{'a': 0.7557954127315099,
'f1': 0.8411140371877743,
'p': 0.7352120535714286,
'r': 0.9826589595375722},
{'a': 0.5216294160057678,
'f1': 0.30194634402945814,
'p': 0.1810725552050473,
'r': 0.9082278481012658},
{'a': 0.5412064570943076,
'f1': 0.55,
'p': 0.38573933372296904,
'r': 0.9579100145137881},
{'a': 0.45275779376498804,
'f1': 0.3491158014831717,
'p': 0.21549295774647886,
'r': 0.918918918918919},
{'a': 0.46584641493423845,
'f1': 0.4211494252873563,
'p': 0.27213309566250743,
'r': 0.9308943089430894},
{'a': 0.3951965065502183,
'f1': 0.29228410832907514,
'p': 0.17470983506414173,
'r': 0.89375},
{'a': 0.47703180212014135,
'f1': 0.5151876116736153,
'p': 0.35262943334692215,
'r': 0.9558011049723757},
{'a': 0.456710653363373,
'f1': 0.48661800486618007,
'p': 0.3270645952575634,
'r': 0.9501187648456056},
{'a': 0.46838258659040355,
'f1': 0.5013412816691505,
'p': 0.33966074313408723,
'r': 0.9567690557451649},
{'a': 0.6817651632970451,
'f1': 0.7405294024409574,
'p': 0.5974424552429668,
'r': 0.9737390579408086},
{'a': 0.39481946624803765,
'f1': 0.3248686514886165,
'p': 0.19619249074563722,
'r': 0.9440203562340967},
{'a': 0.40449775112443775,
'f1': 0.41895845523698066,
'p': 0.26957831325301207,
'r': 0.9396325459317585},
{'a': 0.6685860524632118,
'f1': 0.7033218785796106,
'p': 0.5576748410535877,
'r': 0.951937984496124},
{'a': 0.46245186136071886,
'f1': 0.491962390051562,
'p': 0.33183306055646483,
'r': 0.9507620164126612},
{'a': 0.401333737496211,
'f1': 0.408859622867405,
'p': 0.2614854517611026,
'r': 0.9368998628257887},
{'a': 0.5186202686202687,
'f1': 0.5137218624730188,
'p': 0.35192226446979297,
'r': 0.9509132420091324},
{'a': 0.46610716591349255,
'f1': 0.4969586374695864,
'p': 0.3362139917695473,
'r': 0.9522144522144522}],
'www.foxnews.com;2010': [{'a': 0.7668161434977578,
'f1': 0.3764988009592326,
'p': 0.24881141045958796,
'r': 0.7733990147783252},
{'a': 0.7757424368581738,
'f1': 0.6937073540561032,
'p': 0.5988219895287958,
'r': 0.8243243243243243},
{'a': 0.2345960748516659,
'f1': 0.17103311913000496,
'p': 0.09495060373216246,
'r': 0.8606965174129353},
{'a': 0.7676311030741411,
'f1': 0.32189973614775724,
'p': 0.20854700854700856,
'r': 0.7052023121387283},
{'a': 0.7303617099013519,
'f1': 0.5472392638036809,
'p': 0.40879926672777267,
'r': 0.8274582560296846},
{'a': 0.7756373937677054,
'f1': 0.6793522267206478,
'p': 0.5883590462833099,
'r': 0.803639846743295},
{'a': 0.7619047619047619,
'f1': 0.5780474351902922,
'p': 0.45565217391304347,
'r': 0.7903469079939668},
{'a': 0.7622270070747462,
'f1': 0.6082108464267614,
'p': 0.5050505050505051,
'r': 0.7643312101910829},
{'a': 0.7117411850236278,
'f1': 0.507147296457427,
'p': 0.36298932384341637,
'r': 0.8412371134020619},
{'a': 0.7625284738041003,
'f1': 0.6472081218274112,
'p': 0.5379746835443038,
'r': 0.8121019108280255},
{'a': 0.7522093813732155,
'f1': 0.4774193548387096,
'p': 0.3501577287066246,
'r': 0.75},
{'a': 0.760662671836447,
'f1': 0.4270042194092827,
'p': 0.3092909535452323,
'r': 0.6893732970027248},
{'a': 0.749707145646232,
'f1': 0.4784377542717656,
'p': 0.35336538461538464,
'r': 0.7405541561712846},
{'a': 0.8027233477250083,
'f1': 0.7522935779816513,
'p': 0.6424501424501424,
'r': 0.9074446680080482},
{'a': 0.7579972183588317,
'f1': 0.6481294236602629,
'p': 0.5136217948717948,
'r': 0.8780821917808219},
{'a': 0.7645959831854273,
'f1': 0.3471502590673575,
'p': 0.22521008403361345,
'r': 0.7570621468926554},
{'a': 0.7599640395564878,
'f1': 0.607545320921117,
'p': 0.5,
'r': 0.7740324594257179},
{'a': 0.811549368393916,
'f1': 0.8121305576972501,
'p': 0.7301293900184843,
'r': 0.9148812970469021},
{'a': 0.7687253613666228,
'f1': 0.38028169014084506,
'p': 0.2583732057416268,
'r': 0.72},
{'a': 0.7672496025437202,
'f1': 0.7031630170316301,
'p': 0.5776149233844103,
'r': 0.8984455958549222},
{'a': 0.7780074410913601,
'f1': 0.5251989389920424,
'p': 0.3907894736842105,
'r': 0.8005390835579514},
{'a': 0.7532252729077076,
'f1': 0.5155844155844156,
'p': 0.3899803536345776,
'r': 0.7605363984674329},
{'a': 0.7881653607133208,
'f1': 0.7731481481481481,
'p': 0.6816326530612244,
'r': 0.893048128342246},
{'a': 0.7628689087165408,
'f1': 0.46805234795996925,
'p': 0.3370288248337029,
'r': 0.7657430730478589},
{'a': 0.7885294117647059,
'f1': 0.7472759226713532,
'p': 0.6545566502463054,
'r': 0.8705978705978706}],
'www.foxnews.com;2015': [{'a': 0.8563569682151589,
'f1': 0.8865282472235635,
'p': 0.8652214891611687,
'r': 0.9089108910891089},
{'a': 0.8018691588785046,
'f1': 0.7188328912466844,
'p': 0.6878172588832487,
'r': 0.7527777777777778},
{'a': 0.8519888991674376,
'f1': 0.6444444444444445,
'p': 0.6223175965665236,
'r': 0.6682027649769585},
{'a': 0.7843719090009891,
'f1': 0.6812865497076024,
'p': 0.589873417721519,
'r': 0.8062283737024222},
{'a': 0.8609422492401215,
'f1': 0.8042780748663102,
'p': 0.7752577319587629,
'r': 0.8355555555555556},
{'a': 0.8248520710059172,
'f1': 0.672566371681416,
'p': 0.5984251968503937,
'r': 0.7676767676767676},
{'a': 0.826963906581741,
'f1': 0.7433070866141732,
'p': 0.6685552407932012,
'r': 0.8368794326241135},
{'a': 0.8463476070528967,
'f1': 0.7328467153284672,
'p': 0.7011173184357542,
'r': 0.7675840978593272},
{'a': 0.8742857142857143,
'f1': 0.8811524609843938,
'p': 0.8706998813760379,
'r': 0.8918590522478737},
{'a': 0.8710010319917441,
'f1': 0.5954692556634303,
'p': 0.5227272727272727,
'r': 0.6917293233082706},
{'a': 0.8536170212765958,
'f1': 0.8093126385809313,
'p': 0.8039647577092511,
'r': 0.8147321428571429},
{'a': 0.8363201911589009,
'f1': 0.766609880749574,
'p': 0.7009345794392523,
'r': 0.8458646616541353},
{'a': 0.8519900497512438,
'f1': 0.8344923504867872,
'p': 0.8075370121130552,
'r': 0.8633093525179856},
{'a': 0.8399339933993399,
'f1': 0.8283185840707965,
'p': 0.7878787878787878,
'r': 0.8731343283582089},
{'a': 0.8649334178820546,
'f1': 0.8924785461887934,
'p': 0.8700787401574803,
'r': 0.9160621761658031},
{'a': 0.8697394789579158,
'f1': 0.8959167333867094,
'p': 0.8952,
'r': 0.8966346153846154},
{'a': 0.7922077922077922,
'f1': 0.6363636363636364,
'p': 0.5645161290322581,
'r': 0.7291666666666666},
{'a': 0.8699234844025897,
'f1': 0.9002257336343116,
'p': 0.8815207780725022,
'r': 0.9197416974169742},
{'a': 0.853763440860215,
'f1': 0.84012539184953,
'p': 0.8271604938271605,
'r': 0.8535031847133758},
{'a': 0.8404864091559371,
'f1': 0.8631062001227747,
'p': 0.8155452436194895,
'r': 0.9165580182529335},
{'a': 0.8048780487804879,
'f1': 0.8079999999999998,
'p': 0.7917133258678611,
'r': 0.8249708284714119},
{'a': 0.8483572030328559,
'f1': 0.8369565217391305,
'p': 0.8048780487804879,
'r': 0.8716981132075472},
{'a': 0.8672086720867209,
'f1': 0.8122605363984674,
'p': 0.8153846153846154,
'r': 0.8091603053435115},
{'a': 0.8849701573521432,
'f1': 0.8921668362156663,
'p': 0.8921668362156663,
'r': 0.8921668362156663},
{'a': 0.7781094527363184,
'f1': 0.6836879432624113,
'p': 0.5863746958637469,
'r': 0.8197278911564626},
{'a': 0.8762641284949435,
'f1': 0.8725490196078431,
'p': 0.8651275820170109,
'r': 0.8800988875154512}],
'www.latimes.com;2000': [{'a': 0.8859138533178114,
'f1': 0.9079812206572769,
'p': 0.8719567177637512,
'r': 0.9471106758080313},
{'a': 0.9060402684563759,
'f1': 0.9296754250386399,
'p': 0.9011235955056179,
'r': 0.960095770151636},
{'a': 0.8181818181818182,
'f1': 0.7275541795665633,
'p': 0.6167979002624672,
'r': 0.8867924528301887},
{'a': 0.8636176349402555,
'f1': 0.8996665656259473,
'p': 0.8436611711199545,
'r': 0.9636363636363636},
{'a': 0.9081803005008348,
'f1': 0.9397590361445783,
'p': 0.9081287044877223,
'r': 0.9736722650930549},
{'a': 0.8790149892933619,
'f1': 0.9011373578302712,
'p': 0.865546218487395,
'r': 0.9397810218978102},
{'a': 0.8671875,
'f1': 0.8919262555626193,
'p': 0.8565323565323565,
'r': 0.9303713527851459},
{'a': 0.8974439886399496,
'f1': 0.8914132976946207,
'p': 0.8567758509955041,
'r': 0.9289693593314763},
{'a': 0.906754772393539,
'f1': 0.9151069518716578,
'p': 0.8912760416666666,
'r': 0.9402472527472527},
{'a': 0.864039408866995,
'f1': 0.8217054263565892,
'p': 0.7718446601941747,
'r': 0.8784530386740331},
{'a': 0.95949263502455,
'f1': 0.9440993788819876,
'p': 0.9675925925925926,
'r': 0.9217199558985667},
{'a': 0.8910433979686058,
'f1': 0.9293695131683959,
'p': 0.8885921404044258,
'r': 0.9740694270179842},
{'a': 0.8336025848142165,
'f1': 0.851227732306211,
'p': 0.7864768683274022,
'r': 0.9275970619097587},
{'a': 0.8811685748124753,
'f1': 0.9156153630501823,
'p': 0.8850948509485095,
'r': 0.9483159117305459},
{'a': 0.879980563654033,
'f1': 0.9096892138939672,
'p': 0.8723702664796634,
'r': 0.9503437738731857},
{'a': 0.8948170731707317,
'f1': 0.9210827296988181,
'p': 0.884981684981685,
'r': 0.9602543720190779},
{'a': 0.8715647784632642,
'f1': 0.9174477289113193,
'p': 0.8724717175179979,
'r': 0.9673128088179399},
{'a': 0.8863366336633663,
'f1': 0.9081011847582452,
'p': 0.8807453416149068,
'r': 0.9372108393919365},
{'a': 0.8540250447227191,
'f1': 0.9017341040462428,
'p': 0.8482102401449932,
'r': 0.9624678663239075},
{'a': 0.8820047355958959,
'f1': 0.9161290322580645,
'p': 0.8850948509485095,
'r': 0.9494186046511628},
{'a': 0.875943000838223,
'f1': 0.9118522930315663,
'p': 0.870380898237635,
'r': 0.957473420888055},
{'a': 0.8548465660009742,
'f1': 0.8876319758672699,
'p': 0.8306280875088214,
'r': 0.9530364372469635},
{'a': 0.867056856187291,
'f1': 0.8451801363193768,
'p': 0.8097014925373134,
'r': 0.8839103869653768},
{'a': 0.8775137111517367,
'f1': 0.9103678929765886,
'p': 0.8635786802030457,
'r': 0.9625176803394625},
{'a': 0.9000886786875554,
'f1': 0.9353481254781943,
'p': 0.9015486725663717,
'r': 0.9717806041335453}],
'www.latimes.com;2005': [{'a': 0.7422196124486201,
'f1': 0.7298461538461539,
'p': 0.6161038961038962,
'r': 0.8950943396226415},
{'a': 0.6051838456901748,
'f1': 0.4910644910644911,
'p': 0.3484013230429989,
'r': 0.8315789473684211},
{'a': 0.7498252969951084,
'f1': 0.7250384024577572,
'p': 0.6059050064184852,
'r': 0.9024856596558317},
{'a': 0.7478488589599701,
'f1': 0.7092320966350302,
'p': 0.5796897038081805,
'r': 0.9133333333333333},
{'a': 0.779495990836197,
'f1': 0.7187728268809349,
'p': 0.5992691839220463,
'r': 0.8978102189781022},
{'a': 0.9032732622287606,
'f1': 0.927488282326992,
'p': 0.9241758241758242,
'r': 0.9308245711123408},
{'a': 0.8685015290519877,
'f1': 0.7754569190600522,
'p': 0.7156626506024096,
'r': 0.8461538461538461},
{'a': 0.7058096415327565,
'f1': 0.7202194357366772,
'p': 0.6038107752956636,
'r': 0.8922330097087379},
{'a': 0.7935819601040763,
'f1': 0.8212318477716575,
'p': 0.7263064658990257,
'r': 0.9447004608294931},
{'a': 0.7500845451471085,
'f1': 0.8024592354985296,
'p': 0.7117117117117117,
'r': 0.9197303921568627},
{'a': 0.7272727272727273,
'f1': 0.7387698686938493,
'p': 0.619351100811124,
'r': 0.9152397260273972},
{'a': 0.8605957446808511,
'f1': 0.9013134112543679,
'p': 0.8558352402745996,
'r': 0.9518961567828964},
{'a': 0.7504501260352899,
'f1': 0.7099204688154039,
'p': 0.5812200137080192,
'r': 0.9118279569892473},
{'a': 0.7527058051820269,
'f1': 0.7320540156361051,
'p': 0.6311274509803921,
'r': 0.871404399323181},
{'a': 0.7738570113531759,
'f1': 0.7661059980958426,
'p': 0.6675884955752213,
'r': 0.8987341772151899},
{'a': 0.7750533049040512,
'f1': 0.7287917737789202,
'p': 0.6169749727965179,
'r': 0.8901098901098901},
{'a': 0.8182175107970161,
'f1': 0.84127528282482,
'p': 0.7649625935162094,
'r': 0.9345011424219345},
{'a': 0.7622868605817452,
'f1': 0.7208480565371025,
'p': 0.6169354838709677,
'r': 0.8668555240793201},
{'a': 0.6897179253867152,
'f1': 0.6960784313725491,
'p': 0.5606604450825556,
'r': 0.917743830787309},
{'a': 0.6222222222222222,
'f1': 0.3751178133836004,
'p': 0.24968632371392724,
'r': 0.7537878787878788},
{'a': 0.6855524079320113,
'f1': 0.6961678832116787,
'p': 0.5593841642228738,
'r': 0.9214975845410628},
{'a': 0.8013661202185792,
'f1': 0.851844304055431,
'p': 0.7766629505759941,
'r': 0.9431407942238267},
{'a': 0.9086802194256212,
'f1': 0.9246739419749801,
'p': 0.9234449760765551,
'r': 0.92590618336887},
{'a': 0.732839313572543,
'f1': 0.7519014849692142,
'p': 0.6306196840826246,
'r': 0.9309417040358744},
{'a': 0.6179577464788732,
'f1': 0.354806739345887,
'p': 0.2315653298835705,
'r': 0.7584745762711864}],
'www.latimes.com;2010': [{'a': 0.5850843444806155,
'f1': 0.49166062364031904,
'p': 0.3570300157977883,
'r': 0.789289871944121},
{'a': 0.49572649572649574,
'f1': 0.41124886604172967,
'p': 0.27287319422150885,
'r': 0.8343558282208589},
{'a': 0.6794190577399929,
'f1': 0.3728343728343728,
'p': 0.2642436149312377,
'r': 0.6329411764705882},
{'a': 0.7071513002364066,
'f1': 0.47704485488126647,
'p': 0.3772954924874791,
'r': 0.648493543758967},
{'a': 0.8660617059891107,
'f1': 0.8762990278243379,
'p': 0.8378205128205128,
'r': 0.9184820801124385},
{'a': 0.8609794628751974,
'f1': 0.8613081166272655,
'p': 0.813849590469099,
'r': 0.9146443514644351},
{'a': 0.5482108713466266,
'f1': 0.4580602883355177,
'p': 0.3210840606338999,
'r': 0.7988571428571428},
{'a': 0.7263668192835981,
'f1': 0.49653121902874137,
'p': 0.3691967575534267,
'r': 0.7579425113464447},
{'a': 0.8486257928118394,
'f1': 0.8380090497737557,
'p': 0.7860780984719864,
'r': 0.8972868217054264},
{'a': 0.8374751491053678,
'f1': 0.7872478854912167,
'p': 0.7092614302461899,
'r': 0.8845029239766082},
{'a': 0.5851926977687627,
'f1': 0.1452455590386625,
'p': 0.08128654970760234,
'r': 0.6813725490196079},
{'a': 0.8460222412318221,
'f1': 0.8331788693234478,
'p': 0.779705117085863,
'r': 0.8945273631840795},
{'a': 0.7058642922935217,
'f1': 0.5801928133216476,
'p': 0.4652143359100492,
'r': 0.770663562281723},
{'a': 0.574838388861263,
'f1': 0.5037724898432966,
'p': 0.36717428087986465,
'r': 0.8022181146025879},
{'a': 0.63409915356711,
'f1': 0.41013645224171547,
'p': 0.2743870631194575,
'r': 0.8117283950617284},
{'a': 0.5813497619714366,
'f1': 0.27532719340765877,
'p': 0.1658878504672897,
'r': 0.8091168091168092},
{'a': 0.7512280701754386,
'f1': 0.5599006828057107,
'p': 0.41566820276497696,
'r': 0.8574144486692015},
{'a': 0.5737658674188999,
'f1': 0.5066927848514529,
'p': 0.3653483992467043,
'r': 0.8264110756123536},
{'a': 0.44818136522172397,
'f1': 0.33860853986264555,
'p': 0.21283783783783783,
'r': 0.8277372262773722},
{'a': 0.5393258426966292,
'f1': 0.43914415994387934,
'p': 0.30023980815347723,
'r': 0.8172323759791122},
{'a': 0.8528493364558938,
'f1': 0.8547206165703276,
'p': 0.811265544989027,
'r': 0.9030944625407166},
{'a': 0.5451306413301663,
'f1': 0.44046749452154854,
'p': 0.3015,
'r': 0.8170731707317073},
{'a': 0.6951649055395454,
'f1': 0.5310344827586208,
'p': 0.41945525291828795,
'r': 0.723489932885906},
{'a': 0.8481414324569356,
'f1': 0.8239621650026274,
'p': 0.7574879227053141,
'r': 0.9032258064516129},
{'a': 0.7052851597491788,
'f1': 0.2595648912228057,
'p': 0.1610800744878957,
'r': 0.667953667953668}],
'www.latimes.com;2015': [{'a': 0.5938778389053463,
'f1': 0.1715107913669065,
'p': 0.09600515463917526,
'r': 0.8032345013477089},
{'a': 0.63568345323741,
'f1': 0.18637532133676094,
'p': 0.10681399631675875,
'r': 0.7304785894206549},
{'a': 0.5944452121044632,
'f1': 0.19522895530573073,
'p': 0.11069651741293532,
'r': 0.8259860788863109},
{'a': 0.311409056412851,
'f1': 0.35004775549188155,
'p': 0.21673565937315198,
'r': 0.9094292803970223},
{'a': 0.23751617076326004,
'f1': 0.21643180005317736,
'p': 0.12378345498783455,
'r': 0.8604651162790697},
{'a': 0.5991861648016277,
'f1': 0.13758599124452783,
'p': 0.07498295841854125,
'r': 0.8333333333333334},
{'a': 0.3306508875739645,
'f1': 0.3897280966767372,
'p': 0.24814509480626545,
'r': 0.907537688442211},
{'a': 0.603215251102575,
'f1': 0.1875910282551704,
'p': 0.10595590654820665,
'r': 0.817258883248731},
{'a': 0.5912418842381545,
'f1': 0.19832023841777296,
'p': 0.11244239631336406,
'r': 0.8394495412844036},
{'a': 0.6033519553072626,
'f1': 0.292358803986711,
'p': 0.1753487048107031,
'r': 0.8787446504992867},
{'a': 0.580749718151071,
'f1': 0.12164157071154416,
'p': 0.06592,
'r': 0.7862595419847328},
{'a': 0.5854936959909336,
'f1': 0.13534278959810875,
'p': 0.07360977177756349,
'r': 0.8388278388278388},
{'a': 0.23728315201411349,
'f1': 0.19089207735495947,
'p': 0.10778443113772455,
'r': 0.8337874659400545},
{'a': 0.5976490582070528,
'f1': 0.17484751670055182,
'p': 0.09798177083333333,
'r': 0.8113207547169812},
{'a': 0.601409666283084,
'f1': 0.15851806863042817,
'p': 0.08805668016194332,
'r': 0.7933130699088146},
{'a': 0.5995661605206074,
'f1': 0.1482620732082436,
'p': 0.08166723144696712,
'r': 0.8033333333333333},
{'a': 0.6116892373485389,
'f1': 0.18491921005385994,
'p': 0.10492359932088285,
'r': 0.7783375314861462},
{'a': 0.59974993053626,
'f1': 0.20786362386582347,
'p': 0.11954459203036052,
'r': 0.7957894736842105},
{'a': 0.5972012621758814,
'f1': 0.22818086225026288,
'p': 0.13183475091130012,
'r': 0.84765625},
{'a': 0.2567389875082183,
'f1': 0.19565990750622553,
'p': 0.11079774375503626,
'r': 0.8358662613981763},
{'a': 0.6091758708581139,
'f1': 0.22384701912260968,
'p': 0.1285529715762274,
'r': 0.8652173913043478},
{'a': 0.608569161597461,
'f1': 0.31417979610750696,
'p': 0.19093213179386087,
'r': 0.8862745098039215},
{'a': 0.27805978567399886,
'f1': 0.26857142857142857,
'p': 0.15921409214092141,
'r': 0.8576642335766423},
{'a': 0.2214304565848509,
'f1': 0.1873278236914601,
'p': 0.10559006211180125,
'r': 0.8292682926829268},
{'a': 0.23664980326025858,
'f1': 0.20631209818819407,
'p': 0.11719787516600266,
'r': 0.8609756097560975}],
'www.nymag.com;2000': [{'a': 0.9425414364640884,
'f1': 0.9440860215053763,
'p': 0.9251844046364595,
'r': 0.9637760702524698},
{'a': 0.9427288040426727,
'f1': 0.9430803571428572,
'p': 0.9224890829694323,
'r': 0.9646118721461188},
{'a': 0.9402366863905326,
'f1': 0.9463051568314725,
'p': 0.9368421052631579,
'r': 0.9559613319011815},
{'a': 0.9270248596631917,
'f1': 0.9103448275862068,
'p': 0.8733459357277883,
'r': 0.9506172839506173},
{'a': 0.9404255319148936,
'f1': 0.851851851851852,
'p': 0.8341968911917098,
'r': 0.8702702702702703},
{'a': 0.9549382716049383,
'f1': 0.9581181870338497,
'p': 0.9619815668202765,
'r': 0.9542857142857143},
{'a': 0.9580137262817925,
'f1': 0.9697146185206756,
'p': 0.9714119019836639,
'r': 0.9680232558139535},
{'a': 0.9394673123486683,
'f1': 0.9440089585666294,
'p': 0.9366666666666666,
'r': 0.9514672686230248},
{'a': 0.9344746162927982,
'f1': 0.9410515135422199,
'p': 0.9267782426778243,
'r': 0.9557713052858684},
{'a': 0.5105755041810133,
'f1': 0.4271732872769142,
'p': 0.28277439024390244,
'r': 0.8729411764705882},
{'a': 0.9386454183266932,
'f1': 0.9216683621566633,
'p': 0.9114688128772636,
'r': 0.9320987654320988},
{'a': 0.627173213135866,
'f1': 0.5021496130696474,
'p': 0.34803337306317045,
'r': 0.9012345679012346},
{'a': 0.9426644182124789,
'f1': 0.9506292352371732,
'p': 0.9370229007633588,
'r': 0.9646365422396856},
{'a': 0.9452054794520548,
'f1': 0.9559902200488998,
'p': 0.9630541871921182,
'r': 0.9490291262135923},
{'a': 0.9378813089295619,
'f1': 0.946360153256705,
'p': 0.9285714285714286,
'r': 0.96484375},
{'a': 0.9535490605427975,
'f1': 0.9616213885295387,
'p': 0.948936170212766,
'r': 0.9746503496503497},
{'a': 0.6271008403361344,
'f1': 0.5862470862470862,
'p': 0.4280851063829787,
'r': 0.9297597042513863},
{'a': 0.9449612403100776,
'f1': 0.9086229086229086,
'p': 0.9145077720207254,
'r': 0.9028132992327366},
{'a': 0.9414389291689905,
'f1': 0.9415041782729805,
'p': 0.9378468368479467,
'r': 0.9451901565995525},
{'a': 0.9498181818181818,
'f1': 0.9332042594385286,
'p': 0.9323017408123792,
'r': 0.9341085271317829},
{'a': 0.9301221166892809,
'f1': 0.9286209286209286,
'p': 0.8993288590604027,
'r': 0.9598853868194842},
{'a': 0.9354838709677419,
'f1': 0.932415519399249,
'p': 0.9085365853658537,
'r': 0.9575835475578406},
{'a': 0.9311145510835913,
'f1': 0.9187214611872146,
'p': 0.8902654867256637,
'r': 0.9490566037735849},
{'a': 0.9412225705329154,
'f1': 0.9006622516556291,
'p': 0.9139784946236559,
'r': 0.8877284595300261},
{'a': 0.9436519258202568,
'f1': 0.9399239543726237,
'p': 0.9507692307692308,
'r': 0.9293233082706767}],
'www.nymag.com;2005': [{'a': 0.7711069418386491,
'f1': 0.7621832358674464,
'p': 0.6729776247848537,
'r': 0.8786516853932584},
{'a': 0.7747963584091998,
'f1': 0.7693817468105986,
'p': 0.6782006920415224,
'r': 0.8888888888888888},
{'a': 0.7776712985146143,
'f1': 0.7600827300930713,
'p': 0.6693989071038251,
'r': 0.8791866028708134},
{'a': 0.7718120805369127,
'f1': 0.7603036876355748,
'p': 0.6594543744120414,
'r': 0.8975672215108835},
{'a': 0.772093023255814,
'f1': 0.7591480065537956,
'p': 0.6575212866603595,
'r': 0.8979328165374677},
{'a': 0.6690590111642744,
'f1': 0.5300113250283126,
'p': 0.3848684210526316,
'r': 0.850909090909091},
{'a': 0.6889952153110048,
'f1': 0.6470131885182312,
'p': 0.4970202622169249,
'r': 0.9266666666666666},
{'a': 0.7529296875,
'f1': 0.7548449612403101,
'p': 0.6629787234042553,
'r': 0.876265466816648},
{'a': 0.7890173410404624,
'f1': 0.7859237536656892,
'p': 0.7030430220356768,
'r': 0.8909574468085106},
{'a': 0.8201140487299119,
'f1': 0.8394261915779732,
'p': 0.7699490662139219,
'r': 0.9226856561546287},
{'a': 0.7540029112081513,
'f1': 0.7907552620718118,
'p': 0.6794326241134752,
'r': 0.945705824284304},
{'a': 0.7821091505949939,
'f1': 0.8270921523933572,
'p': 0.7345286292654714,
'r': 0.9463487332339792},
{'a': 0.7581395348837209,
'f1': 0.6584564860426929,
'p': 0.5976154992548435,
'r': 0.7330895795246801},
{'a': 0.6695778748180495,
'f1': 0.6828132277596646,
'p': 0.5461997019374069,
'r': 0.9105590062111801},
{'a': 0.697495183044316,
'f1': 0.7186379928315413,
'p': 0.5905743740795287,
'r': 0.9176201372997712},
{'a': 0.7282120395327942,
'f1': 0.7632093933463796,
'p': 0.6482712765957447,
'r': 0.9276879162702188},
{'a': 0.8176943699731903,
'f1': 0.8482142857142857,
'p': 0.7840440165061898,
'r': 0.9238249594813615},
{'a': 0.7697462900909526,
'f1': 0.7593796898449224,
'p': 0.6704946996466431,
'r': 0.8754325259515571},
{'a': 0.7224770642201835,
'f1': 0.7547628698824482,
'p': 0.6363636363636364,
'r': 0.9272908366533864},
{'a': 0.7748896517900932,
'f1': 0.7610619469026549,
'p': 0.6688014638609332,
'r': 0.8828502415458938},
{'a': 0.7994902293967715,
'f1': 0.814026792750197,
'p': 0.7405017921146954,
'r': 0.9037620297462817},
{'a': 0.7799607072691552,
'f1': 0.7812499999999999,
'p': 0.6884681583476764,
'r': 0.9029345372460497},
{'a': 0.6740623349181194,
'f1': 0.6716338477913784,
'p': 0.531592249368155,
'r': 0.911849710982659},
{'a': 0.7434108527131783,
'f1': 0.602641056422569,
'p': 0.47992351816443596,
'r': 0.8096774193548387},
{'a': 0.6736401673640168,
'f1': 0.6729559748427673,
'p': 0.5358931552587646,
'r': 0.9042253521126761},
{'a': 0.768056968463886,
'f1': 0.7894736842105263,
'p': 0.6979591836734694,
'r': 0.9086078639744952}],
'www.nymag.com;2010': [{'a': 0.48481943112815595,
'f1': 0.0627906976744186,
'p': 0.037241379310344824,
'r': 0.2},
{'a': 0.44631901840490795,
'f1': 0.33419402434526,
'p': 0.20798898071625344,
'r': 0.849906191369606},
{'a': 0.3217094017094017,
'f1': 0.036893203883495145,
'p': 0.01954732510288066,
'r': 0.3275862068965517},
{'a': 0.311042524005487,
'f1': 0.02899951667472209,
'p': 0.015511892450879007,
'r': 0.2222222222222222},
{'a': 0.4183240952070427,
'f1': 0.22904062229904926,
'p': 0.1347914547304171,
'r': 0.7614942528735632},
{'a': 0.3187355943365163,
'f1': 0.1510053344275749,
'p': 0.08329560887279312,
'r': 0.8070175438596491},
{'a': 0.41282778171509565,
'f1': 0.043854587420657815,
'p': 0.023944549464398234,
'r': 0.2602739726027397},
{'a': 0.43229657555765,
'f1': 0.2786427145708583,
'p': 0.16941747572815535,
'r': 0.7842696629213484},
{'a': 0.3127237227465018,
'f1': 0.14146341463414633,
'p': 0.0777479892761394,
'r': 0.7837837837837838},
{'a': 0.30776762402088775,
'f1': 0.1360488798370672,
'p': 0.07435440783615316,
'r': 0.7990430622009569},
{'a': 0.5344157329064715,
'f1': 0.5479573712255773,
'p': 0.3932441045251753,
'r': 0.9033674963396779},
{'a': 0.3160771704180064,
'f1': 0.15829046299960425,
'p': 0.08798944126704795,
'r': 0.7874015748031497},
{'a': 0.3274732850741124,
'f1': 0.03938946331856228,
'p': 0.021197668256491786,
'r': 0.2777777777777778},
{'a': 0.309208290859667,
'f1': 0.031443544545021435,
'p': 0.016516516516516516,
'r': 0.32673267326732675},
{'a': 0.3213815789473684,
'f1': 0.16104107360715736,
'p': 0.0894713059195662,
'r': 0.8048780487804879},
{'a': 0.32691658223573117,
'f1': 0.15658061785865424,
'p': 0.08714083843617522,
'r': 0.7708333333333334},
{'a': 0.4134419551934827,
'f1': 0.187206020696143,
'p': 0.1067024128686327,
'r': 0.7624521072796935},
{'a': 0.4195666447800394,
'f1': 0.23130434782608694,
'p': 0.13516260162601626,
'r': 0.8012048192771084},
{'a': 0.5790219702338767,
'f1': 0.5123152709359605,
'p': 0.3659824046920821,
'r': 0.853625170998632},
{'a': 0.48916909149692855,
'f1': 0.05952380952380952,
'p': 0.03333333333333333,
'r': 0.2777777777777778},
{'a': 0.3778471138845554,
'f1': 0.3018207282913165,
'p': 0.18340425531914895,
'r': 0.8517786561264822},
{'a': 0.3234536082474227,
'f1': 0.16930379746835442,
'p': 0.09460654288240496,
'r': 0.8045112781954887},
{'a': 0.4106593782029382,
'f1': 0.18207681365576103,
'p': 0.10339256865912763,
'r': 0.7619047619047619},
{'a': 0.40123034859876966,
'f1': 0.04782608695652174,
'p': 0.026112759643916916,
'r': 0.2838709677419355},
{'a': 0.5376782077393075,
'f1': 0.5942806076854334,
'p': 0.44127405441274054,
'r': 0.9097127222982216}],
'www.nymag.com;2015': [{'a': 0.12131556489201077,
'f1': 0.0027319011548491245,
'p': 0.0013710582076529975,
'r': 0.36666666666666664},
{'a': 0.12269175361243288,
'f1': 0.003965008797363269,
'p': 0.001994266483858906,
'r': 0.33613445378151263},
{'a': 0.12294602844710008,
'f1': 0.02455937590291823,
'p': 0.01246913278403951,
'r': 0.8082408874801902},
{'a': 0.16712910070181167,
'f1': 0.05854498493327594,
'p': 0.030300136859861868,
'r': 0.8631006346328196},
{'a': 0.25470154326426825,
'f1': 0.25441998690374257,
'p': 0.14750459948018574,
'r': 0.9245835621453414},
{'a': 0.17664334917498797,
'f1': 0.0838928168260947,
'p': 0.04405891163255117,
'r': 0.8748451053283767},
{'a': 0.12178293724674187,
'f1': 0.0027855845996965704,
'p': 0.0013984966161376521,
'r': 0.34146341463414637},
{'a': 0.12372770769899956,
'f1': 0.005528816487720596,
'p': 0.0027873870734924466,
'r': 0.33532934131736525},
{'a': 0.12206695969734742,
'f1': 0.003672912271994044,
'p': 0.0018477826608070316,
'r': 0.29959514170040485},
{'a': 0.12218016322779686,
'f1': 0.0030812812166090995,
'p': 0.0015481035731229244,
'r': 0.31958762886597936},
{'a': 0.12040199256052729,
'f1': 0.01787622656174099,
'p': 0.009036884239477433,
'r': 0.8177777777777778},
{'a': 0.15947986577181208,
'f1': 0.004504355314145663,
'p': 0.002267724938304542,
'r': 0.3285024154589372},
{'a': 0.12283227537464826,
'f1': 0.004061818902318208,
'p': 0.002044123145955378,
'r': 0.31417624521072796},
{'a': 0.131848751352114,
'f1': 0.02022239324858682,
'p': 0.010238357563217155,
'r': 0.8141025641025641},
{'a': 0.294921875,
'f1': 0.34880605811648296,
'p': 0.21353597200962168,
'r': 0.9516616314199395},
{'a': 0.12296532587559265,
'f1': 0.004612294492523619,
'p': 0.0023225033089428865,
'r': 0.3274647887323944},
{'a': 0.12218213621952553,
'f1': 0.003573999156139088,
'p': 0.001795914294978923,
'r': 0.36},
{'a': 0.627518315018315,
'f1': 0.7415409054805402,
'p': 0.6018566271273853,
'r': 0.965659908978072},
{'a': 0.12394710426395496,
'f1': 0.005351568306823249,
'p': 0.002696965913347484,
'r': 0.34069400630914826},
{'a': 0.12461045612046985,
'f1': 0.0060868489422244215,
'p': 0.003071928071928072,
'r': 0.328},
{'a': 0.16345886410413307,
'f1': 0.009158293526601878,
'p': 0.004635187408296652,
'r': 0.3787465940054496},
{'a': 0.1570371188687583,
'f1': 0.030863021527910078,
'p': 0.01572306696861857,
'r': 0.8321917808219178},
{'a': 0.12216138391396349,
'f1': 0.003220133263976617,
'p': 0.001618002140741294,
'r': 0.3282828282828283},
{'a': 0.12024096385542168,
'f1': 0.0014917580368464236,
'p': 0.0007477380922708806,
'r': 0.3},
{'a': 0.12138728323699421,
'f1': 0.0031796502384737677,
'p': 0.001597444089456869,
'r': 0.3333333333333333},
{'a': 0.1540937213883417,
'f1': 0.02350831118086026,
'p': 0.011924876527164023,
'r': 0.8210290827740492}]}]
[About 4036 more lines. Double-click to unfold]
>>> for basepath,trimmed in trimmed_results:
... print(basepath)
... for key,val in trimmed.items():
... print(key.split(";"))
... print("precision avg", sum([d['p'] for d in val])/len(val))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
['news.yahoo.com', '2000']
('precision avg', 0.8683999024519955)
['www.cnn.com', '2005']
('precision avg', 0.6211738688863229)
['news.yahoo.com', '2005']
('precision avg', 0.6484059381381134)
['www.cnn.com', '2000']
('precision avg', 0.7057985767079694)
['www.esquire.com', '2010']
('precision avg', 0.350690725741593)
['www.nymag.com', '2015']
('precision avg', 0.04347666120043913)
['www.latimes.com', '2005']
('precision avg', 0.6286822306460558)
['www.latimes.com', '2000']
('precision avg', 0.8572444640811792)
['www.esquire.com', '2000']
('precision avg', 0.8838432933187248)
['www.foxnews.com', '2010']
('precision avg', 0.4369663029782896)
['www.foxnews.com', '2015']
('precision avg', 0.7543074627097117)
['www.forbes.com', '2000']
('precision avg', 0.672786168820609)
['news.bbc.co.uk', '2015']
('precision avg', 0.13308241469189427)
['www.forbes.com', '2005']
('precision avg', 0.7088740876453605)
['news.bbc.co.uk', '2010']
('precision avg', 0.41742126841582045)
['entertainment.msn.com', '2000']
('precision avg', 0.8464097205988462)
['entertainment.msn.com', '2005']
('precision avg', 0.4417019947232166)
['thenation.com', '2015']
('precision avg', 0.5021943230481295)
['thenation.com', '2010']
('precision avg', 0.4915998914598782)
['news.yahoo.com', '2010']
('precision avg', 0.578675352037152)
['news.yahoo.com', '2015']
('precision avg', 0.018386000373501205)
['www.cnn.com', '2010']
('precision avg', 0.4320541005367761)
['www.cnn.com', '2015']
('precision avg', 0.3029125013356543)
['www.latimes.com', '2015']
('precision avg', 0.12218070191163449)
['www.latimes.com', '2010']
('precision avg', 0.4394908403162711)
['www.foxnews.com', '2000']
('precision avg', 0.8753434627782866)
['www.foxnews.com', '2005']
('precision avg', 0.3387610582651177)
['www.nymag.com', '2000']
('precision avg', 0.855375126938793)
['news.bbc.co.uk', '2005']
('precision avg', 0.6766215452483197)
['www.forbes.com', '2015']
('precision avg', 0.3484809556323725)
['news.bbc.co.uk', '2000']
('precision avg', 0.8919253176470745)
['www.nymag.com', '2010']
('precision avg', 0.12117487487387163)
['www.forbes.com', '2010']
('precision avg', 0.660445014823463)
['thenation.com', '2005']
('precision avg', 0.7906462969568482)
['entertainment.msn.com', '2010']
('precision avg', 0.406078738764565)
['entertainment.msn.com', '2015']
('precision avg', 0.07401168704789596)
['www.esquire.com', '2015']
('precision avg', 0.06668416878257147)
['thenation.com', '2000']
('precision avg', 0.8797908423638537)
['www.nymag.com', '2005']
('precision avg', 0.63792601378758)
['www.esquire.com', '2005']
('precision avg', 0.926445902055539)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
['news.yahoo.com', '2000']
('precision avg', 0.6850494294232871)
['www.cnn.com', '2005']
('precision avg', 0.6040764658910175)
['news.yahoo.com', '2005']
('precision avg', 0.553762919588722)
['www.cnn.com', '2000']
('precision avg', 0.6767895607401481)
['www.esquire.com', '2010']
('precision avg', 0.33316897330572265)
['www.nymag.com', '2015']
('precision avg', 0.045097010038526314)
['www.latimes.com', '2005']
('precision avg', 0.6083888210659326)
['www.latimes.com', '2000']
('precision avg', 0.9250015482359525)
['www.esquire.com', '2000']
('precision avg', 0.8413135303544912)
['www.foxnews.com', '2010']
('precision avg', 0.30196347934410317)
['www.foxnews.com', '2015']
('precision avg', 0.5811714938921815)
['www.forbes.com', '2000']
('precision avg', 0.6534683899153207)
['news.bbc.co.uk', '2015']
('precision avg', 0.11640002710415726)
['www.forbes.com', '2005']
('precision avg', 0.3535572901091079)
['news.bbc.co.uk', '2010']
('precision avg', 0.3041230830149431)
['entertainment.msn.com', '2000']
('precision avg', 0.7786809001887803)
['entertainment.msn.com', '2005']
('precision avg', 0.4186477840737833)
['thenation.com', '2015']
('precision avg', 0.46860182851819593)
['thenation.com', '2010']
('precision avg', 0.4313543594309007)
['news.yahoo.com', '2010']
('precision avg', 0.31975003566238014)
['news.yahoo.com', '2015']
('precision avg', 0.018336170825126605)
['www.cnn.com', '2010']
('precision avg', 0.33209885398619127)
['www.cnn.com', '2015']
('precision avg', 0.29940846505010726)
['www.latimes.com', '2015']
('precision avg', 0.12272875317104835)
['www.latimes.com', '2010']
('precision avg', 0.4017566608588345)
['www.foxnews.com', '2000']
('precision avg', 0.7997548741274138)
['www.foxnews.com', '2005']
('precision avg', 0.5437722450092386)
['www.nymag.com', '2000']
('precision avg', 0.8357303273376807)
['news.bbc.co.uk', '2005']
('precision avg', 0.5177519581452089)
['www.forbes.com', '2015']
('precision avg', 0.2791851517078164)
['news.bbc.co.uk', '2000']
('precision avg', 0.5729001117183127)
['www.nymag.com', '2010']
('precision avg', 0.1207055620925438)
['www.forbes.com', '2010']
('precision avg', 0.5461973136131762)
['thenation.com', '2005']
('precision avg', 0.7305810594010783)
['entertainment.msn.com', '2010']
('precision avg', 0.34039318503958943)
['entertainment.msn.com', '2015']
('precision avg', 0.07007420541341315)
['www.esquire.com', '2015']
('precision avg', 0.06624818281736693)
['thenation.com', '2000']
('precision avg', 0.8673455478551348)
['www.nymag.com', '2005']
('precision avg', 0.563772151455564)
['www.esquire.com', '2005']
('precision avg', 0.9210595620529616)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
['news.yahoo.com', '2000']
('precision avg', 0.7085868798180318)
['www.cnn.com', '2005']
('precision avg', 0.5452230974470502)
['news.yahoo.com', '2005']
('precision avg', 0.5720537517102889)
['www.cnn.com', '2000']
('precision avg', 0.5996675436639709)
['www.esquire.com', '2010']
('precision avg', 0.35502210934852385)
['www.nymag.com', '2015']
('precision avg', 0.044054500032989705)
['www.latimes.com', '2005']
('precision avg', 0.5986065482125335)
['www.latimes.com', '2000']
('precision avg', 0.8111021925809357)
['www.esquire.com', '2000']
('precision avg', 0.8605989019941799)
['www.foxnews.com', '2010']
('precision avg', 0.33226391278577666)
['www.foxnews.com', '2015']
('precision avg', 0.5507998052113364)
['www.forbes.com', '2000']
('precision avg', 0.6396386293000841)
['news.bbc.co.uk', '2015']
('precision avg', 0.11919826889537567)
['www.forbes.com', '2005']
('precision avg', 0.46550867158986875)
['news.bbc.co.uk', '2010']
('precision avg', 0.3116620725542801)
['entertainment.msn.com', '2000']
('precision avg', 0.6700112446095992)
['entertainment.msn.com', '2005']
('precision avg', 0.41877132883633295)
['thenation.com', '2015']
('precision avg', 0.45609212979185026)
['thenation.com', '2010']
('precision avg', 0.45163668180184763)
['news.yahoo.com', '2010']
('precision avg', 0.3842242600405546)
['news.yahoo.com', '2015']
('precision avg', 0.018210151381855617)
['www.cnn.com', '2010']
('precision avg', 0.3597736757896344)
['www.cnn.com', '2015']
('precision avg', 0.2894186892399318)
['www.latimes.com', '2015']
('precision avg', 0.11616947582549013)
['www.latimes.com', '2010']
('precision avg', 0.42023599354375335)
['www.foxnews.com', '2000']
('precision avg', 0.8038458494347899)
['www.foxnews.com', '2005']
('precision avg', 0.3258139976914447)
['www.nymag.com', '2000']
('precision avg', 0.8370761752694753)
['news.bbc.co.uk', '2005']
('precision avg', 0.5984474642761501)
['www.forbes.com', '2015']
('precision avg', 0.2957366925768594)
['news.bbc.co.uk', '2000']
('precision avg', 0.6203488498249787)
['www.nymag.com', '2010']
('precision avg', 0.12616883801360262)
['www.forbes.com', '2010']
('precision avg', 0.6366358379063284)
['thenation.com', '2005']
('precision avg', 0.7878965878803256)
['entertainment.msn.com', '2010']
('precision avg', 0.31730289839466586)
['entertainment.msn.com', '2015']
('precision avg', 0.07134534027116624)
['www.esquire.com', '2015']
('precision avg', 0.06563692613337585)
['thenation.com', '2000']
('precision avg', 0.8883697815371147)
['www.nymag.com', '2005']
('precision avg', 0.5591489354125457)
['www.esquire.com', '2005']
('precision avg', 0.8962966109245787)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
['news.yahoo.com', '2000']
('precision avg', 0.845171687583807)
['www.cnn.com', '2005']
('precision avg', 0.6138419519432511)
['news.yahoo.com', '2005']
('precision avg', 0.6604807297526994)
['www.cnn.com', '2000']
('precision avg', 0.814601457327025)
['www.esquire.com', '2010']
('precision avg', 0.4108209241176574)
['www.nymag.com', '2015']
('precision avg', 0.042433386182753455)
['www.latimes.com', '2005']
('precision avg', 0.62804991992724)
['www.latimes.com', '2000']
('precision avg', 0.8608073651801277)
['www.esquire.com', '2000']
('precision avg', 0.8958452423944537)
['www.foxnews.com', '2010']
('precision avg', 0.4429766402972281)
['www.foxnews.com', '2015']
('precision avg', 0.7578331189482941)
['www.forbes.com', '2000']
('precision avg', 0.6696646303443038)
['news.bbc.co.uk', '2015']
('precision avg', 0.13348385856834152)
['www.forbes.com', '2005']
('precision avg', 0.6725911369907592)
['news.bbc.co.uk', '2010']
('precision avg', 0.41579101137917696)
['entertainment.msn.com', '2000']
('precision avg', 0.8307937669693192)
['entertainment.msn.com', '2005']
('precision avg', 0.4385329315775029)
['thenation.com', '2015']
('precision avg', 0.5045095303547008)
['thenation.com', '2010']
('precision avg', 0.5713283942566657)
['news.yahoo.com', '2010']
('precision avg', 0.5688487055711114)
['news.yahoo.com', '2015']
('precision avg', 0.018287140557954475)
['www.cnn.com', '2010']
('precision avg', 0.4253522345143547)
['www.cnn.com', '2015']
('precision avg', 0.31429130410851036)
['www.latimes.com', '2015']
('precision avg', 0.13362793010722546)
['www.latimes.com', '2010']
('precision avg', 0.4775014374694188)
['www.foxnews.com', '2000']
('precision avg', 0.8939153820029523)
['www.foxnews.com', '2005']
('precision avg', 0.3291739634988008)
['www.nymag.com', '2000']
('precision avg', 0.8780918332144314)
['news.bbc.co.uk', '2005']
('precision avg', 0.8696766652605129)
['www.forbes.com', '2015']
('precision avg', 0.34104189997176576)
['news.bbc.co.uk', '2000']
('precision avg', 0.9269321571721406)
['www.nymag.com', '2010']
('precision avg', 0.19291701995638091)
['www.forbes.com', '2010']
('precision avg', 0.6700458372321113)
['thenation.com', '2005']
('precision avg', 0.9004401876670914)
['entertainment.msn.com', '2010']
('precision avg', 0.4615173559036399)
['entertainment.msn.com', '2015']
('precision avg', 0.07619402011443083)
['www.esquire.com', '2015']
('precision avg', 0.06688037449997622)
['thenation.com', '2000']
('precision avg', 0.9264909687190436)
['www.nymag.com', '2005']
('precision avg', 0.647029606678659)
['www.esquire.com', '2005']
('precision avg', 0.92854392161831)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
['news.yahoo.com', '2000']
('precision avg', 0.42620741281304775)
['www.cnn.com', '2005']
('precision avg', 0.13467802471805546)
['news.yahoo.com', '2005']
('precision avg', 0.19753138142997795)
['www.cnn.com', '2000']
('precision avg', 0.36240339186286513)
['www.esquire.com', '2010']
('precision avg', 0.07315564039653401)
['www.nymag.com', '2015']
('precision avg', 0.007736638561868903)
['www.latimes.com', '2005']
('precision avg', 0.5836816702543528)
['www.latimes.com', '2000']
('precision avg', 0.3108694782857746)
['www.esquire.com', '2000']
('precision avg', 0.36353215206468886)
['www.foxnews.com', '2010']
('precision avg', 0.04998551810329787)
['www.foxnews.com', '2015']
('precision avg', 0.375579362581543)
['www.forbes.com', '2000']
('precision avg', 0.19561991680653532)
['news.bbc.co.uk', '2015']
('precision avg', 0.03774622737251699)
['www.forbes.com', '2005']
('precision avg', 0.3108646536464451)
['news.bbc.co.uk', '2010']
('precision avg', 0.060235937357474324)
['entertainment.msn.com', '2000']
('precision avg', 0.4101324917837275)
['entertainment.msn.com', '2005']
('precision avg', 0.13401296466303134)
['thenation.com', '2015']
('precision avg', 0.05919721060422903)
['thenation.com', '2010']
('precision avg', 0.20247773008213288)
['news.yahoo.com', '2010']
('precision avg', 0.17931260635945423)
['news.yahoo.com', '2015']
('precision avg', 0.013623785917849206)
['www.cnn.com', '2010']
('precision avg', 0.14134634671225613)
['www.cnn.com', '2015']
('precision avg', 0.06418395143820504)
['www.latimes.com', '2015']
('precision avg', 0.03862377662399559)
['www.latimes.com', '2010']
('precision avg', 0.12564869282111016)
['www.foxnews.com', '2000']
('precision avg', 0.6306054822791173)
['www.foxnews.com', '2005']
('precision avg', 0.08660519833502513)
['www.nymag.com', '2000']
('precision avg', 0.6510800190870829)
['news.bbc.co.uk', '2005']
('precision avg', 0.42373560283340894)
['www.forbes.com', '2015']
('precision avg', 0.09245746028554677)
['news.bbc.co.uk', '2000']
('precision avg', 0.48511903626399655)
['www.nymag.com', '2010']
('precision avg', 0.10996307421215494)
['www.forbes.com', '2010']
('precision avg', 0.26944679283076217)
['thenation.com', '2005']
('precision avg', 0.36475352093750507)
['entertainment.msn.com', '2010']
('precision avg', 0.1298783825779457)
['entertainment.msn.com', '2015']
('precision avg', 0.01665606784896567)
['www.esquire.com', '2015']
('precision avg', 0.03007502159405593)
['thenation.com', '2000']
('precision avg', 0.5940573521660849)
['www.nymag.com', '2005']
('precision avg', 0.29333271544410583)
['www.esquire.com', '2005']
('precision avg', 0.5122452215303767)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
['news.yahoo.com', '2000']
('precision avg', 0.20901849585870255)
['www.cnn.com', '2005']
('precision avg', 0.059986740214574584)
['news.yahoo.com', '2005']
('precision avg', 0.10767432608632829)
['www.cnn.com', '2000']
('precision avg', 0.257515252955112)
['www.esquire.com', '2010']
('precision avg', 0.04548066527479219)
['www.nymag.com', '2015']
('precision avg', 0.026359227184905656)
['www.latimes.com', '2005']
('precision avg', 0.05359185225411894)
['www.latimes.com', '2000']
('precision avg', 0.259452776657885)
['www.esquire.com', '2000']
('precision avg', 0.08509745093616063)
['www.foxnews.com', '2010']
('precision avg', 0.05581275411375033)
['www.foxnews.com', '2015']
('precision avg', 0.4335891319208261)
['www.forbes.com', '2000']
('precision avg', 0.06402753069155678)
['news.bbc.co.uk', '2015']
('precision avg', 0.0343743719528452)
['www.forbes.com', '2005']
('precision avg', 0.05855408531900379)
['news.bbc.co.uk', '2010']
('precision avg', 0.030801142370060597)
['entertainment.msn.com', '2000']
('precision avg', 0.1207254953634531)
['entertainment.msn.com', '2005']
('precision avg', 0.022684054643112104)
['thenation.com', '2015']
('precision avg', 0.04517924477576478)
['thenation.com', '2010']
('precision avg', 0.17286342176717917)
['news.yahoo.com', '2010']
('precision avg', 0.13330163214667995)
['news.yahoo.com', '2015']
('precision avg', 0.01360822373383575)
['www.cnn.com', '2010']
('precision avg', 0.10473888905032128)
['www.cnn.com', '2015']
('precision avg', 0.06889188795616198)
['www.latimes.com', '2015']
('precision avg', 0.042838971662592956)
['www.latimes.com', '2010']
('precision avg', 0.09076425485857424)
['www.foxnews.com', '2000']
('precision avg', 0.19892154478650526)
['www.foxnews.com', '2005']
('precision avg', 0.06539045075135168)
['www.nymag.com', '2000']
('precision avg', 0.7097155883500723)
['news.bbc.co.uk', '2005']
('precision avg', 0.17945710895699843)
['www.forbes.com', '2015']
('precision avg', 0.08303484471116346)
['news.bbc.co.uk', '2000']
('precision avg', 0.20720090593774804)
['www.nymag.com', '2010']
('precision avg', 0.044021800471251676)
['www.forbes.com', '2010']
('precision avg', 0.12450890050980233)
['thenation.com', '2005']
('precision avg', 0.3682191437488494)
['entertainment.msn.com', '2010']
('precision avg', 0.048415999768495664)
['entertainment.msn.com', '2015']
('precision avg', 0.019098335185455066)
['www.esquire.com', '2015']
('precision avg', 0.029511317676966412)
['thenation.com', '2000']
('precision avg', 0.13596091613435268)
['www.nymag.com', '2005']
('precision avg', 0.06628139954248631)
['www.esquire.com', '2005']
('precision avg', 0.15894624504628735)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
['news.yahoo.com', '2000']
('precision avg', 0.8087223234480646)
['www.cnn.com', '2005']
('precision avg', 0.6246431645192004)
['news.yahoo.com', '2005']
('precision avg', 0.6564277365163304)
['www.cnn.com', '2000']
('precision avg', 0.8548475040578897)
['www.esquire.com', '2010']
('precision avg', 0.3866449923138486)
['www.nymag.com', '2015']
('precision avg', 0.04407018165599923)
['www.latimes.com', '2005']
('precision avg', 0.6305525928335406)
['www.latimes.com', '2000']
('precision avg', 0.8586431262759081)
['www.esquire.com', '2000']
('precision avg', 0.9094951163901436)
['www.foxnews.com', '2010']
('precision avg', 0.43042619558685963)
['www.foxnews.com', '2015']
('precision avg', 0.7143877357447221)
['www.forbes.com', '2000']
('precision avg', 0.6714638467737778)
['news.bbc.co.uk', '2015']
('precision avg', 0.13167702125754785)
['www.forbes.com', '2005']
('precision avg', 0.6347119061510296)
['news.bbc.co.uk', '2010']
('precision avg', 0.4218849025093671)
['entertainment.msn.com', '2000']
('precision avg', 0.8394696101599314)
['entertainment.msn.com', '2005']
('precision avg', 0.41909133109112984)
['thenation.com', '2015']
('precision avg', 0.49137907199335934)
['thenation.com', '2010']
('precision avg', 0.5434962294760681)
['news.yahoo.com', '2010']
('precision avg', 0.5255237109769181)
['news.yahoo.com', '2015']
('precision avg', 0.018207304858120263)
['www.cnn.com', '2010']
('precision avg', 0.41096334652475897)
['www.cnn.com', '2015']
('precision avg', 0.2995482008650643)
['www.latimes.com', '2015']
('precision avg', 0.12943625842051695)
['www.latimes.com', '2010']
('precision avg', 0.46851571662688785)
['www.foxnews.com', '2000']
('precision avg', 0.8730005208941793)
['www.foxnews.com', '2005']
('precision avg', 0.32624183171026927)
['www.nymag.com', '2000']
('precision avg', 0.87037309891489)
['news.bbc.co.uk', '2005']
('precision avg', 0.8903295054227871)
['www.forbes.com', '2015']
('precision avg', 0.3247401991305493)
['news.bbc.co.uk', '2000']
('precision avg', 0.9404262665672861)
['www.nymag.com', '2010']
('precision avg', 0.18612609604198724)
['www.forbes.com', '2010']
('precision avg', 0.6832190050181055)
['thenation.com', '2005']
('precision avg', 0.8503041167751857)
['entertainment.msn.com', '2010']
('precision avg', 0.3885914367069556)
['entertainment.msn.com', '2015']
('precision avg', 0.07223171179425797)
['www.esquire.com', '2015']
('precision avg', 0.06655548974343942)
['thenation.com', '2000']
('precision avg', 0.9018062702104928)
['www.nymag.com', '2005']
('precision avg', 0.6187842974937978)
['www.esquire.com', '2005']
('precision avg', 0.9281173744128661)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
['news.yahoo.com', '2000']
('precision avg', 0.624509586529224)
['www.cnn.com', '2005']
('precision avg', 0.5211798048500964)
['news.yahoo.com', '2005']
('precision avg', 0.47413698373950586)
['www.cnn.com', '2000']
('precision avg', 0.7639870392099527)
['www.esquire.com', '2010']
('precision avg', 0.30168348382925414)
['www.nymag.com', '2015']
('precision avg', 0.02016864363656134)
['www.latimes.com', '2005']
('precision avg', 0.5780249782557191)
['www.latimes.com', '2000']
('precision avg', 0.8533664269391111)
['www.esquire.com', '2000']
('precision avg', 0.7898786198862732)
['www.foxnews.com', '2010']
('precision avg', 0.2962800934568883)
['www.foxnews.com', '2015']
('precision avg', 0.6090391904597992)
['www.forbes.com', '2000']
('precision avg', 0.5711755450519884)
['news.bbc.co.uk', '2015']
('precision avg', 0.12364045968228651)
['www.forbes.com', '2005']
('precision avg', 0.3897890357149691)
['news.bbc.co.uk', '2010']
('precision avg', 0.4059495413065444)
['entertainment.msn.com', '2000']
('precision avg', 0.5947838565979695)
['entertainment.msn.com', '2005']
('precision avg', 0.3002877518789036)
['thenation.com', '2015']
('precision avg', 0.4305324047938475)
['thenation.com', '2010']
('precision avg', 0.43766238053965867)
['news.yahoo.com', '2010']
('precision avg', 0.44113596091377016)
['news.yahoo.com', '2015']
('precision avg', 0.01726963014185236)
['www.cnn.com', '2010']
('precision avg', 0.40366920898565617)
['www.cnn.com', '2015']
('precision avg', 0.2791625929367795)
['www.latimes.com', '2015']
('precision avg', 0.09226749054973236)
['www.latimes.com', '2010']
('precision avg', 0.3291085018457033)
['www.foxnews.com', '2000']
('precision avg', 0.7523577774183531)
['www.foxnews.com', '2005']
('precision avg', 0.2933510531370516)
['www.nymag.com', '2000']
('precision avg', 0.7789060527515866)
['news.bbc.co.uk', '2005']
('precision avg', 0.4781585844754011)
['www.forbes.com', '2015']
('precision avg', 0.27262554887522134)
['news.bbc.co.uk', '2000']
('precision avg', 0.8106990701712056)
['www.nymag.com', '2010']
('precision avg', 0.17948716199317488)
['www.forbes.com', '2010']
('precision avg', 0.46850561527616924)
['thenation.com', '2005']
('precision avg', 0.7429835013444328)
['entertainment.msn.com', '2010']
('precision avg', 0.26444680402070014)
['entertainment.msn.com', '2015']
('precision avg', 0.06978700649645515)
['www.esquire.com', '2015']
('precision avg', 0.06346258311336817)
['thenation.com', '2000']
('precision avg', 0.88178763991206)
['www.nymag.com', '2005']
('precision avg', 0.5501415443668539)
['www.esquire.com', '2005']
('precision avg', 0.6723481555542996)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
['news.yahoo.com', '2000']
('precision avg', 0.582312988541893)
['www.cnn.com', '2005']
('precision avg', 0.5193456459563168)
['news.yahoo.com', '2005']
('precision avg', 0.5907695245619571)
['www.cnn.com', '2000']
('precision avg', 0.6515399864573219)
['www.esquire.com', '2010']
('precision avg', 0.37115173971617904)
['www.nymag.com', '2015']
('precision avg', 0.043716349444529276)
['www.latimes.com', '2005']
('precision avg', 0.595289193234168)
['www.latimes.com', '2000']
('precision avg', 0.7738884403357201)
['www.esquire.com', '2000']
('precision avg', 0.7644592804937886)
['www.foxnews.com', '2010']
('precision avg', 0.3128787204252841)
['www.foxnews.com', '2015']
('precision avg', 0.5532927097447694)
['www.forbes.com', '2000']
('precision avg', 0.5907915319816335)
['news.bbc.co.uk', '2015']
('precision avg', 0.12744554000908173)
['www.forbes.com', '2005']
('precision avg', 0.440919472040343)
['news.bbc.co.uk', '2010']
('precision avg', 0.35022956666455196)
['entertainment.msn.com', '2000']
('precision avg', 0.7026935512166758)
['entertainment.msn.com', '2005']
('precision avg', 0.408552851894306)
['thenation.com', '2015']
('precision avg', 0.49478343796012586)
['thenation.com', '2010']
('precision avg', 0.46639164402721606)
['news.yahoo.com', '2010']
('precision avg', 0.42375700219940554)
['news.yahoo.com', '2015']
('precision avg', 0.018395332008463647)
['www.cnn.com', '2010']
('precision avg', 0.34478378415001126)
['www.cnn.com', '2015']
('precision avg', 0.29387452934098823)
['www.latimes.com', '2015']
('precision avg', 0.12303070023234008)
['www.latimes.com', '2010']
('precision avg', 0.4339480725003696)
['www.foxnews.com', '2000']
('precision avg', 0.784355741068858)
['www.foxnews.com', '2005']
('precision avg', 0.3340192053349481)
['www.nymag.com', '2000']
('precision avg', 0.7817656558124935)
['news.bbc.co.uk', '2005']
('precision avg', 0.6212194798442074)
['www.forbes.com', '2015']
('precision avg', 0.3002309261235297)
['news.bbc.co.uk', '2000']
('precision avg', 0.6426368875256027)
['www.nymag.com', '2010']
('precision avg', 0.18114137356170518)
['www.forbes.com', '2010']
('precision avg', 0.6597499715250379)
['thenation.com', '2005']
('precision avg', 0.7599538973666159)
['entertainment.msn.com', '2010']
('precision avg', 0.3236012443738165)
['entertainment.msn.com', '2015']
('precision avg', 0.06974145722281835)
['www.esquire.com', '2015']
('precision avg', 0.06628174494498319)
['thenation.com', '2000']
('precision avg', 0.8369926820075588)
['www.nymag.com', '2005']
('precision avg', 0.5302978779309909)
['www.esquire.com', '2005']
('precision avg', 0.8564583004210408)
>>> avg_results = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val)}}
...
... print("precision avg", sum([d['p'] for d in val])/len(val))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
('precision avg', 0.8683999024519955)
('precision avg', 0.6211738688863229)
('precision avg', 0.6484059381381134)
('precision avg', 0.7057985767079694)
('precision avg', 0.350690725741593)
('precision avg', 0.04347666120043913)
('precision avg', 0.6286822306460558)
('precision avg', 0.8572444640811792)
('precision avg', 0.8838432933187248)
('precision avg', 0.4369663029782896)
('precision avg', 0.7543074627097117)
('precision avg', 0.672786168820609)
('precision avg', 0.13308241469189427)
('precision avg', 0.7088740876453605)
('precision avg', 0.41742126841582045)
('precision avg', 0.8464097205988462)
('precision avg', 0.4417019947232166)
('precision avg', 0.5021943230481295)
('precision avg', 0.4915998914598782)
('precision avg', 0.578675352037152)
('precision avg', 0.018386000373501205)
('precision avg', 0.4320541005367761)
('precision avg', 0.3029125013356543)
('precision avg', 0.12218070191163449)
('precision avg', 0.4394908403162711)
('precision avg', 0.8753434627782866)
('precision avg', 0.3387610582651177)
('precision avg', 0.855375126938793)
('precision avg', 0.6766215452483197)
('precision avg', 0.3484809556323725)
('precision avg', 0.8919253176470745)
('precision avg', 0.12117487487387163)
('precision avg', 0.660445014823463)
('precision avg', 0.7906462969568482)
('precision avg', 0.406078738764565)
('precision avg', 0.07401168704789596)
('precision avg', 0.06668416878257147)
('precision avg', 0.8797908423638537)
('precision avg', 0.63792601378758)
('precision avg', 0.926445902055539)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
('precision avg', 0.6850494294232871)
('precision avg', 0.6040764658910175)
('precision avg', 0.553762919588722)
('precision avg', 0.6767895607401481)
('precision avg', 0.33316897330572265)
('precision avg', 0.045097010038526314)
('precision avg', 0.6083888210659326)
('precision avg', 0.9250015482359525)
('precision avg', 0.8413135303544912)
('precision avg', 0.30196347934410317)
('precision avg', 0.5811714938921815)
('precision avg', 0.6534683899153207)
('precision avg', 0.11640002710415726)
('precision avg', 0.3535572901091079)
('precision avg', 0.3041230830149431)
('precision avg', 0.7786809001887803)
('precision avg', 0.4186477840737833)
('precision avg', 0.46860182851819593)
('precision avg', 0.4313543594309007)
('precision avg', 0.31975003566238014)
('precision avg', 0.018336170825126605)
('precision avg', 0.33209885398619127)
('precision avg', 0.29940846505010726)
('precision avg', 0.12272875317104835)
('precision avg', 0.4017566608588345)
('precision avg', 0.7997548741274138)
('precision avg', 0.5437722450092386)
('precision avg', 0.8357303273376807)
('precision avg', 0.5177519581452089)
('precision avg', 0.2791851517078164)
('precision avg', 0.5729001117183127)
('precision avg', 0.1207055620925438)
('precision avg', 0.5461973136131762)
('precision avg', 0.7305810594010783)
('precision avg', 0.34039318503958943)
('precision avg', 0.07007420541341315)
('precision avg', 0.06624818281736693)
('precision avg', 0.8673455478551348)
('precision avg', 0.563772151455564)
('precision avg', 0.9210595620529616)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
('precision avg', 0.7085868798180318)
('precision avg', 0.5452230974470502)
('precision avg', 0.5720537517102889)
('precision avg', 0.5996675436639709)
('precision avg', 0.35502210934852385)
('precision avg', 0.044054500032989705)
('precision avg', 0.5986065482125335)
('precision avg', 0.8111021925809357)
('precision avg', 0.8605989019941799)
('precision avg', 0.33226391278577666)
('precision avg', 0.5507998052113364)
('precision avg', 0.6396386293000841)
('precision avg', 0.11919826889537567)
('precision avg', 0.46550867158986875)
('precision avg', 0.3116620725542801)
('precision avg', 0.6700112446095992)
('precision avg', 0.41877132883633295)
('precision avg', 0.45609212979185026)
('precision avg', 0.45163668180184763)
('precision avg', 0.3842242600405546)
('precision avg', 0.018210151381855617)
('precision avg', 0.3597736757896344)
('precision avg', 0.2894186892399318)
('precision avg', 0.11616947582549013)
('precision avg', 0.42023599354375335)
('precision avg', 0.8038458494347899)
('precision avg', 0.3258139976914447)
('precision avg', 0.8370761752694753)
('precision avg', 0.5984474642761501)
('precision avg', 0.2957366925768594)
('precision avg', 0.6203488498249787)
('precision avg', 0.12616883801360262)
('precision avg', 0.6366358379063284)
('precision avg', 0.7878965878803256)
('precision avg', 0.31730289839466586)
('precision avg', 0.07134534027116624)
('precision avg', 0.06563692613337585)
('precision avg', 0.8883697815371147)
('precision avg', 0.5591489354125457)
('precision avg', 0.8962966109245787)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
('precision avg', 0.845171687583807)
('precision avg', 0.6138419519432511)
('precision avg', 0.6604807297526994)
('precision avg', 0.814601457327025)
('precision avg', 0.4108209241176574)
('precision avg', 0.042433386182753455)
('precision avg', 0.62804991992724)
('precision avg', 0.8608073651801277)
('precision avg', 0.8958452423944537)
('precision avg', 0.4429766402972281)
('precision avg', 0.7578331189482941)
('precision avg', 0.6696646303443038)
('precision avg', 0.13348385856834152)
('precision avg', 0.6725911369907592)
('precision avg', 0.41579101137917696)
('precision avg', 0.8307937669693192)
('precision avg', 0.4385329315775029)
('precision avg', 0.5045095303547008)
('precision avg', 0.5713283942566657)
('precision avg', 0.5688487055711114)
('precision avg', 0.018287140557954475)
('precision avg', 0.4253522345143547)
('precision avg', 0.31429130410851036)
('precision avg', 0.13362793010722546)
('precision avg', 0.4775014374694188)
('precision avg', 0.8939153820029523)
('precision avg', 0.3291739634988008)
('precision avg', 0.8780918332144314)
('precision avg', 0.8696766652605129)
('precision avg', 0.34104189997176576)
('precision avg', 0.9269321571721406)
('precision avg', 0.19291701995638091)
('precision avg', 0.6700458372321113)
('precision avg', 0.9004401876670914)
('precision avg', 0.4615173559036399)
('precision avg', 0.07619402011443083)
('precision avg', 0.06688037449997622)
('precision avg', 0.9264909687190436)
('precision avg', 0.647029606678659)
('precision avg', 0.92854392161831)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
('precision avg', 0.42620741281304775)
('precision avg', 0.13467802471805546)
('precision avg', 0.19753138142997795)
('precision avg', 0.36240339186286513)
('precision avg', 0.07315564039653401)
('precision avg', 0.007736638561868903)
('precision avg', 0.5836816702543528)
('precision avg', 0.3108694782857746)
('precision avg', 0.36353215206468886)
('precision avg', 0.04998551810329787)
('precision avg', 0.375579362581543)
('precision avg', 0.19561991680653532)
('precision avg', 0.03774622737251699)
('precision avg', 0.3108646536464451)
('precision avg', 0.060235937357474324)
('precision avg', 0.4101324917837275)
('precision avg', 0.13401296466303134)
('precision avg', 0.05919721060422903)
('precision avg', 0.20247773008213288)
('precision avg', 0.17931260635945423)
('precision avg', 0.013623785917849206)
('precision avg', 0.14134634671225613)
('precision avg', 0.06418395143820504)
('precision avg', 0.03862377662399559)
('precision avg', 0.12564869282111016)
('precision avg', 0.6306054822791173)
('precision avg', 0.08660519833502513)
('precision avg', 0.6510800190870829)
('precision avg', 0.42373560283340894)
('precision avg', 0.09245746028554677)
('precision avg', 0.48511903626399655)
('precision avg', 0.10996307421215494)
('precision avg', 0.26944679283076217)
('precision avg', 0.36475352093750507)
('precision avg', 0.1298783825779457)
('precision avg', 0.01665606784896567)
('precision avg', 0.03007502159405593)
('precision avg', 0.5940573521660849)
('precision avg', 0.29333271544410583)
('precision avg', 0.5122452215303767)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
('precision avg', 0.20901849585870255)
('precision avg', 0.059986740214574584)
('precision avg', 0.10767432608632829)
('precision avg', 0.257515252955112)
('precision avg', 0.04548066527479219)
('precision avg', 0.026359227184905656)
('precision avg', 0.05359185225411894)
('precision avg', 0.259452776657885)
('precision avg', 0.08509745093616063)
('precision avg', 0.05581275411375033)
('precision avg', 0.4335891319208261)
('precision avg', 0.06402753069155678)
('precision avg', 0.0343743719528452)
('precision avg', 0.05855408531900379)
('precision avg', 0.030801142370060597)
('precision avg', 0.1207254953634531)
('precision avg', 0.022684054643112104)
('precision avg', 0.04517924477576478)
('precision avg', 0.17286342176717917)
('precision avg', 0.13330163214667995)
('precision avg', 0.01360822373383575)
('precision avg', 0.10473888905032128)
('precision avg', 0.06889188795616198)
('precision avg', 0.042838971662592956)
('precision avg', 0.09076425485857424)
('precision avg', 0.19892154478650526)
('precision avg', 0.06539045075135168)
('precision avg', 0.7097155883500723)
('precision avg', 0.17945710895699843)
('precision avg', 0.08303484471116346)
('precision avg', 0.20720090593774804)
('precision avg', 0.044021800471251676)
('precision avg', 0.12450890050980233)
('precision avg', 0.3682191437488494)
('precision avg', 0.048415999768495664)
('precision avg', 0.019098335185455066)
('precision avg', 0.029511317676966412)
('precision avg', 0.13596091613435268)
('precision avg', 0.06628139954248631)
('precision avg', 0.15894624504628735)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
('precision avg', 0.8087223234480646)
('precision avg', 0.6246431645192004)
('precision avg', 0.6564277365163304)
('precision avg', 0.8548475040578897)
('precision avg', 0.3866449923138486)
('precision avg', 0.04407018165599923)
('precision avg', 0.6305525928335406)
('precision avg', 0.8586431262759081)
('precision avg', 0.9094951163901436)
('precision avg', 0.43042619558685963)
('precision avg', 0.7143877357447221)
('precision avg', 0.6714638467737778)
('precision avg', 0.13167702125754785)
('precision avg', 0.6347119061510296)
('precision avg', 0.4218849025093671)
('precision avg', 0.8394696101599314)
('precision avg', 0.41909133109112984)
('precision avg', 0.49137907199335934)
('precision avg', 0.5434962294760681)
('precision avg', 0.5255237109769181)
('precision avg', 0.018207304858120263)
('precision avg', 0.41096334652475897)
('precision avg', 0.2995482008650643)
('precision avg', 0.12943625842051695)
('precision avg', 0.46851571662688785)
('precision avg', 0.8730005208941793)
('precision avg', 0.32624183171026927)
('precision avg', 0.87037309891489)
('precision avg', 0.8903295054227871)
('precision avg', 0.3247401991305493)
('precision avg', 0.9404262665672861)
('precision avg', 0.18612609604198724)
('precision avg', 0.6832190050181055)
('precision avg', 0.8503041167751857)
('precision avg', 0.3885914367069556)
('precision avg', 0.07223171179425797)
('precision avg', 0.06655548974343942)
('precision avg', 0.9018062702104928)
('precision avg', 0.6187842974937978)
('precision avg', 0.9281173744128661)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
('precision avg', 0.624509586529224)
('precision avg', 0.5211798048500964)
('precision avg', 0.47413698373950586)
('precision avg', 0.7639870392099527)
('precision avg', 0.30168348382925414)
('precision avg', 0.02016864363656134)
('precision avg', 0.5780249782557191)
('precision avg', 0.8533664269391111)
('precision avg', 0.7898786198862732)
('precision avg', 0.2962800934568883)
('precision avg', 0.6090391904597992)
('precision avg', 0.5711755450519884)
('precision avg', 0.12364045968228651)
('precision avg', 0.3897890357149691)
('precision avg', 0.4059495413065444)
('precision avg', 0.5947838565979695)
('precision avg', 0.3002877518789036)
('precision avg', 0.4305324047938475)
('precision avg', 0.43766238053965867)
('precision avg', 0.44113596091377016)
('precision avg', 0.01726963014185236)
('precision avg', 0.40366920898565617)
('precision avg', 0.2791625929367795)
('precision avg', 0.09226749054973236)
('precision avg', 0.3291085018457033)
('precision avg', 0.7523577774183531)
('precision avg', 0.2933510531370516)
('precision avg', 0.7789060527515866)
('precision avg', 0.4781585844754011)
('precision avg', 0.27262554887522134)
('precision avg', 0.8106990701712056)
('precision avg', 0.17948716199317488)
('precision avg', 0.46850561527616924)
('precision avg', 0.7429835013444328)
('precision avg', 0.26444680402070014)
('precision avg', 0.06978700649645515)
('precision avg', 0.06346258311336817)
('precision avg', 0.88178763991206)
('precision avg', 0.5501415443668539)
('precision avg', 0.6723481555542996)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
('precision avg', 0.582312988541893)
('precision avg', 0.5193456459563168)
('precision avg', 0.5907695245619571)
('precision avg', 0.6515399864573219)
('precision avg', 0.37115173971617904)
('precision avg', 0.043716349444529276)
('precision avg', 0.595289193234168)
('precision avg', 0.7738884403357201)
('precision avg', 0.7644592804937886)
('precision avg', 0.3128787204252841)
('precision avg', 0.5532927097447694)
('precision avg', 0.5907915319816335)
('precision avg', 0.12744554000908173)
('precision avg', 0.440919472040343)
('precision avg', 0.35022956666455196)
('precision avg', 0.7026935512166758)
('precision avg', 0.408552851894306)
('precision avg', 0.49478343796012586)
('precision avg', 0.46639164402721606)
('precision avg', 0.42375700219940554)
('precision avg', 0.018395332008463647)
('precision avg', 0.34478378415001126)
('precision avg', 0.29387452934098823)
('precision avg', 0.12303070023234008)
('precision avg', 0.4339480725003696)
('precision avg', 0.784355741068858)
('precision avg', 0.3340192053349481)
('precision avg', 0.7817656558124935)
('precision avg', 0.6212194798442074)
('precision avg', 0.3002309261235297)
('precision avg', 0.6426368875256027)
('precision avg', 0.18114137356170518)
('precision avg', 0.6597499715250379)
('precision avg', 0.7599538973666159)
('precision avg', 0.3236012443738165)
('precision avg', 0.06974145722281835)
('precision avg', 0.06628174494498319)
('precision avg', 0.8369926820075588)
('precision avg', 0.5302978779309909)
('precision avg', 0.8564583004210408)
[About 368 more lines. Double-click to unfold]
>>> avg_results['entertainment.msn.com']
24: {'2015': {'avg_precision': 0.06974145722281835}}
>>> avg_results['news.bbc.co.uk']
25: {'2000': {'avg_precision': 0.6426368875256027}}
>>> avg_results = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val)}
...
...
... print("precision avg", sum([d['p'] for d in val])/len(val))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
('precision avg', 0.8683999024519955)
('precision avg', 0.6211738688863229)
('precision avg', 0.6484059381381134)
('precision avg', 0.7057985767079694)
('precision avg', 0.350690725741593)
('precision avg', 0.04347666120043913)
('precision avg', 0.6286822306460558)
('precision avg', 0.8572444640811792)
('precision avg', 0.8838432933187248)
('precision avg', 0.4369663029782896)
('precision avg', 0.7543074627097117)
('precision avg', 0.672786168820609)
('precision avg', 0.13308241469189427)
('precision avg', 0.7088740876453605)
('precision avg', 0.41742126841582045)
('precision avg', 0.8464097205988462)
('precision avg', 0.4417019947232166)
('precision avg', 0.5021943230481295)
('precision avg', 0.4915998914598782)
('precision avg', 0.578675352037152)
('precision avg', 0.018386000373501205)
('precision avg', 0.4320541005367761)
('precision avg', 0.3029125013356543)
('precision avg', 0.12218070191163449)
('precision avg', 0.4394908403162711)
('precision avg', 0.8753434627782866)
('precision avg', 0.3387610582651177)
('precision avg', 0.855375126938793)
('precision avg', 0.6766215452483197)
('precision avg', 0.3484809556323725)
('precision avg', 0.8919253176470745)
('precision avg', 0.12117487487387163)
('precision avg', 0.660445014823463)
('precision avg', 0.7906462969568482)
('precision avg', 0.406078738764565)
('precision avg', 0.07401168704789596)
('precision avg', 0.06668416878257147)
('precision avg', 0.8797908423638537)
('precision avg', 0.63792601378758)
('precision avg', 0.926445902055539)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
('precision avg', 0.6850494294232871)
('precision avg', 0.6040764658910175)
('precision avg', 0.553762919588722)
('precision avg', 0.6767895607401481)
('precision avg', 0.33316897330572265)
('precision avg', 0.045097010038526314)
('precision avg', 0.6083888210659326)
('precision avg', 0.9250015482359525)
('precision avg', 0.8413135303544912)
('precision avg', 0.30196347934410317)
('precision avg', 0.5811714938921815)
('precision avg', 0.6534683899153207)
('precision avg', 0.11640002710415726)
('precision avg', 0.3535572901091079)
('precision avg', 0.3041230830149431)
('precision avg', 0.7786809001887803)
('precision avg', 0.4186477840737833)
('precision avg', 0.46860182851819593)
('precision avg', 0.4313543594309007)
('precision avg', 0.31975003566238014)
('precision avg', 0.018336170825126605)
('precision avg', 0.33209885398619127)
('precision avg', 0.29940846505010726)
('precision avg', 0.12272875317104835)
('precision avg', 0.4017566608588345)
('precision avg', 0.7997548741274138)
('precision avg', 0.5437722450092386)
('precision avg', 0.8357303273376807)
('precision avg', 0.5177519581452089)
('precision avg', 0.2791851517078164)
('precision avg', 0.5729001117183127)
('precision avg', 0.1207055620925438)
('precision avg', 0.5461973136131762)
('precision avg', 0.7305810594010783)
('precision avg', 0.34039318503958943)
('precision avg', 0.07007420541341315)
('precision avg', 0.06624818281736693)
('precision avg', 0.8673455478551348)
('precision avg', 0.563772151455564)
('precision avg', 0.9210595620529616)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
('precision avg', 0.7085868798180318)
('precision avg', 0.5452230974470502)
('precision avg', 0.5720537517102889)
('precision avg', 0.5996675436639709)
('precision avg', 0.35502210934852385)
('precision avg', 0.044054500032989705)
('precision avg', 0.5986065482125335)
('precision avg', 0.8111021925809357)
('precision avg', 0.8605989019941799)
('precision avg', 0.33226391278577666)
('precision avg', 0.5507998052113364)
('precision avg', 0.6396386293000841)
('precision avg', 0.11919826889537567)
('precision avg', 0.46550867158986875)
('precision avg', 0.3116620725542801)
('precision avg', 0.6700112446095992)
('precision avg', 0.41877132883633295)
('precision avg', 0.45609212979185026)
('precision avg', 0.45163668180184763)
('precision avg', 0.3842242600405546)
('precision avg', 0.018210151381855617)
('precision avg', 0.3597736757896344)
('precision avg', 0.2894186892399318)
('precision avg', 0.11616947582549013)
('precision avg', 0.42023599354375335)
('precision avg', 0.8038458494347899)
('precision avg', 0.3258139976914447)
('precision avg', 0.8370761752694753)
('precision avg', 0.5984474642761501)
('precision avg', 0.2957366925768594)
('precision avg', 0.6203488498249787)
('precision avg', 0.12616883801360262)
('precision avg', 0.6366358379063284)
('precision avg', 0.7878965878803256)
('precision avg', 0.31730289839466586)
('precision avg', 0.07134534027116624)
('precision avg', 0.06563692613337585)
('precision avg', 0.8883697815371147)
('precision avg', 0.5591489354125457)
('precision avg', 0.8962966109245787)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
('precision avg', 0.845171687583807)
('precision avg', 0.6138419519432511)
('precision avg', 0.6604807297526994)
('precision avg', 0.814601457327025)
('precision avg', 0.4108209241176574)
('precision avg', 0.042433386182753455)
('precision avg', 0.62804991992724)
('precision avg', 0.8608073651801277)
('precision avg', 0.8958452423944537)
('precision avg', 0.4429766402972281)
('precision avg', 0.7578331189482941)
('precision avg', 0.6696646303443038)
('precision avg', 0.13348385856834152)
('precision avg', 0.6725911369907592)
('precision avg', 0.41579101137917696)
('precision avg', 0.8307937669693192)
('precision avg', 0.4385329315775029)
('precision avg', 0.5045095303547008)
('precision avg', 0.5713283942566657)
('precision avg', 0.5688487055711114)
('precision avg', 0.018287140557954475)
('precision avg', 0.4253522345143547)
('precision avg', 0.31429130410851036)
('precision avg', 0.13362793010722546)
('precision avg', 0.4775014374694188)
('precision avg', 0.8939153820029523)
('precision avg', 0.3291739634988008)
('precision avg', 0.8780918332144314)
('precision avg', 0.8696766652605129)
('precision avg', 0.34104189997176576)
('precision avg', 0.9269321571721406)
('precision avg', 0.19291701995638091)
('precision avg', 0.6700458372321113)
('precision avg', 0.9004401876670914)
('precision avg', 0.4615173559036399)
('precision avg', 0.07619402011443083)
('precision avg', 0.06688037449997622)
('precision avg', 0.9264909687190436)
('precision avg', 0.647029606678659)
('precision avg', 0.92854392161831)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
('precision avg', 0.42620741281304775)
('precision avg', 0.13467802471805546)
('precision avg', 0.19753138142997795)
('precision avg', 0.36240339186286513)
('precision avg', 0.07315564039653401)
('precision avg', 0.007736638561868903)
('precision avg', 0.5836816702543528)
('precision avg', 0.3108694782857746)
('precision avg', 0.36353215206468886)
('precision avg', 0.04998551810329787)
('precision avg', 0.375579362581543)
('precision avg', 0.19561991680653532)
('precision avg', 0.03774622737251699)
('precision avg', 0.3108646536464451)
('precision avg', 0.060235937357474324)
('precision avg', 0.4101324917837275)
('precision avg', 0.13401296466303134)
('precision avg', 0.05919721060422903)
('precision avg', 0.20247773008213288)
('precision avg', 0.17931260635945423)
('precision avg', 0.013623785917849206)
('precision avg', 0.14134634671225613)
('precision avg', 0.06418395143820504)
('precision avg', 0.03862377662399559)
('precision avg', 0.12564869282111016)
('precision avg', 0.6306054822791173)
('precision avg', 0.08660519833502513)
('precision avg', 0.6510800190870829)
('precision avg', 0.42373560283340894)
('precision avg', 0.09245746028554677)
('precision avg', 0.48511903626399655)
('precision avg', 0.10996307421215494)
('precision avg', 0.26944679283076217)
('precision avg', 0.36475352093750507)
('precision avg', 0.1298783825779457)
('precision avg', 0.01665606784896567)
('precision avg', 0.03007502159405593)
('precision avg', 0.5940573521660849)
('precision avg', 0.29333271544410583)
('precision avg', 0.5122452215303767)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
('precision avg', 0.20901849585870255)
('precision avg', 0.059986740214574584)
('precision avg', 0.10767432608632829)
('precision avg', 0.257515252955112)
('precision avg', 0.04548066527479219)
('precision avg', 0.026359227184905656)
('precision avg', 0.05359185225411894)
('precision avg', 0.259452776657885)
('precision avg', 0.08509745093616063)
('precision avg', 0.05581275411375033)
('precision avg', 0.4335891319208261)
('precision avg', 0.06402753069155678)
('precision avg', 0.0343743719528452)
('precision avg', 0.05855408531900379)
('precision avg', 0.030801142370060597)
('precision avg', 0.1207254953634531)
('precision avg', 0.022684054643112104)
('precision avg', 0.04517924477576478)
('precision avg', 0.17286342176717917)
('precision avg', 0.13330163214667995)
('precision avg', 0.01360822373383575)
('precision avg', 0.10473888905032128)
('precision avg', 0.06889188795616198)
('precision avg', 0.042838971662592956)
('precision avg', 0.09076425485857424)
('precision avg', 0.19892154478650526)
('precision avg', 0.06539045075135168)
('precision avg', 0.7097155883500723)
('precision avg', 0.17945710895699843)
('precision avg', 0.08303484471116346)
('precision avg', 0.20720090593774804)
('precision avg', 0.044021800471251676)
('precision avg', 0.12450890050980233)
('precision avg', 0.3682191437488494)
('precision avg', 0.048415999768495664)
('precision avg', 0.019098335185455066)
('precision avg', 0.029511317676966412)
('precision avg', 0.13596091613435268)
('precision avg', 0.06628139954248631)
('precision avg', 0.15894624504628735)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
('precision avg', 0.8087223234480646)
('precision avg', 0.6246431645192004)
('precision avg', 0.6564277365163304)
('precision avg', 0.8548475040578897)
('precision avg', 0.3866449923138486)
('precision avg', 0.04407018165599923)
('precision avg', 0.6305525928335406)
('precision avg', 0.8586431262759081)
('precision avg', 0.9094951163901436)
('precision avg', 0.43042619558685963)
('precision avg', 0.7143877357447221)
('precision avg', 0.6714638467737778)
('precision avg', 0.13167702125754785)
('precision avg', 0.6347119061510296)
('precision avg', 0.4218849025093671)
('precision avg', 0.8394696101599314)
('precision avg', 0.41909133109112984)
('precision avg', 0.49137907199335934)
('precision avg', 0.5434962294760681)
('precision avg', 0.5255237109769181)
('precision avg', 0.018207304858120263)
('precision avg', 0.41096334652475897)
('precision avg', 0.2995482008650643)
('precision avg', 0.12943625842051695)
('precision avg', 0.46851571662688785)
('precision avg', 0.8730005208941793)
('precision avg', 0.32624183171026927)
('precision avg', 0.87037309891489)
('precision avg', 0.8903295054227871)
('precision avg', 0.3247401991305493)
('precision avg', 0.9404262665672861)
('precision avg', 0.18612609604198724)
('precision avg', 0.6832190050181055)
('precision avg', 0.8503041167751857)
('precision avg', 0.3885914367069556)
('precision avg', 0.07223171179425797)
('precision avg', 0.06655548974343942)
('precision avg', 0.9018062702104928)
('precision avg', 0.6187842974937978)
('precision avg', 0.9281173744128661)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
('precision avg', 0.624509586529224)
('precision avg', 0.5211798048500964)
('precision avg', 0.47413698373950586)
('precision avg', 0.7639870392099527)
('precision avg', 0.30168348382925414)
('precision avg', 0.02016864363656134)
('precision avg', 0.5780249782557191)
('precision avg', 0.8533664269391111)
('precision avg', 0.7898786198862732)
('precision avg', 0.2962800934568883)
('precision avg', 0.6090391904597992)
('precision avg', 0.5711755450519884)
('precision avg', 0.12364045968228651)
('precision avg', 0.3897890357149691)
('precision avg', 0.4059495413065444)
('precision avg', 0.5947838565979695)
('precision avg', 0.3002877518789036)
('precision avg', 0.4305324047938475)
('precision avg', 0.43766238053965867)
('precision avg', 0.44113596091377016)
('precision avg', 0.01726963014185236)
('precision avg', 0.40366920898565617)
('precision avg', 0.2791625929367795)
('precision avg', 0.09226749054973236)
('precision avg', 0.3291085018457033)
('precision avg', 0.7523577774183531)
('precision avg', 0.2933510531370516)
('precision avg', 0.7789060527515866)
('precision avg', 0.4781585844754011)
('precision avg', 0.27262554887522134)
('precision avg', 0.8106990701712056)
('precision avg', 0.17948716199317488)
('precision avg', 0.46850561527616924)
('precision avg', 0.7429835013444328)
('precision avg', 0.26444680402070014)
('precision avg', 0.06978700649645515)
('precision avg', 0.06346258311336817)
('precision avg', 0.88178763991206)
('precision avg', 0.5501415443668539)
('precision avg', 0.6723481555542996)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
('precision avg', 0.582312988541893)
('precision avg', 0.5193456459563168)
('precision avg', 0.5907695245619571)
('precision avg', 0.6515399864573219)
('precision avg', 0.37115173971617904)
('precision avg', 0.043716349444529276)
('precision avg', 0.595289193234168)
('precision avg', 0.7738884403357201)
('precision avg', 0.7644592804937886)
('precision avg', 0.3128787204252841)
('precision avg', 0.5532927097447694)
('precision avg', 0.5907915319816335)
('precision avg', 0.12744554000908173)
('precision avg', 0.440919472040343)
('precision avg', 0.35022956666455196)
('precision avg', 0.7026935512166758)
('precision avg', 0.408552851894306)
('precision avg', 0.49478343796012586)
('precision avg', 0.46639164402721606)
('precision avg', 0.42375700219940554)
('precision avg', 0.018395332008463647)
('precision avg', 0.34478378415001126)
('precision avg', 0.29387452934098823)
('precision avg', 0.12303070023234008)
('precision avg', 0.4339480725003696)
('precision avg', 0.784355741068858)
('precision avg', 0.3340192053349481)
('precision avg', 0.7817656558124935)
('precision avg', 0.6212194798442074)
('precision avg', 0.3002309261235297)
('precision avg', 0.6426368875256027)
('precision avg', 0.18114137356170518)
('precision avg', 0.6597499715250379)
('precision avg', 0.7599538973666159)
('precision avg', 0.3236012443738165)
('precision avg', 0.06974145722281835)
('precision avg', 0.06628174494498319)
('precision avg', 0.8369926820075588)
('precision avg', 0.5302978779309909)
('precision avg', 0.8564583004210408)
[About 368 more lines. Double-click to unfold]
>>> avg_results['news.bbc.co.uk']
26: {'2000': {'avg_precision': 0.6426368875256027},
'2005': {'avg_precision': 0.6212194798442074},
'2010': {'avg_precision': 0.35022956666455196},
'2015': {'avg_precision': 0.12744554000908173}}
>>> extractors_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val)}
...
...
... print("precision avg", sum([d['p'] for d in val])/len(val))
...
... extractor_avgs[extractor] = avg_results
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
('precision avg', 0.8683999024519955)
('precision avg', 0.6211738688863229)
('precision avg', 0.6484059381381134)
('precision avg', 0.7057985767079694)
('precision avg', 0.350690725741593)
('precision avg', 0.04347666120043913)
('precision avg', 0.6286822306460558)
('precision avg', 0.8572444640811792)
('precision avg', 0.8838432933187248)
('precision avg', 0.4369663029782896)
('precision avg', 0.7543074627097117)
('precision avg', 0.672786168820609)
('precision avg', 0.13308241469189427)
('precision avg', 0.7088740876453605)
('precision avg', 0.41742126841582045)
('precision avg', 0.8464097205988462)
('precision avg', 0.4417019947232166)
('precision avg', 0.5021943230481295)
('precision avg', 0.4915998914598782)
('precision avg', 0.578675352037152)
('precision avg', 0.018386000373501205)
('precision avg', 0.4320541005367761)
('precision avg', 0.3029125013356543)
('precision avg', 0.12218070191163449)
('precision avg', 0.43949
Traceback (most recent call last):
File "<pyshell#89>", line 18, in <module>
extractor_avgs[extractor] = avg_results
NameError: name 'extractor_avgs' is not defined
08403162711)
('precision avg', 0.8753434627782866)
('precision avg', 0.3387610582651177)
('precision avg', 0.855375126938793)
('precision avg', 0.6766215452483197)
('precision avg', 0.3484809556323725)
('precision avg', 0.8919253176470745)
('precision avg', 0.12117487487387163)
('precision avg', 0.660445014823463)
('precision avg', 0.7906462969568482)
('precision avg', 0.406078738764565)
('precision avg', 0.07401168704789596)
('precision avg', 0.06668416878257147)
('precision avg', 0.8797908423638537)
('precision avg', 0.63792601378758)
('precision avg', 0.926445902055539)
[About 45 more lines. Double-click to unfold]
>>> extractors_avgs[0]
Traceback (most recent call last):
File "<pyshell#90>", line 1, in <module>
extractors_avgs[0]
KeyError: 0
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val)}
...
...
... print("precision avg", sum([d['p'] for d in val])/len(val))
...
... extractor_avgs[extractor] = avg_results
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
('precision avg', 0.8683999024519955)
('precision avg', 0.6211738688863229)
('precision avg', 0.6484059381381134)
('precision avg', 0.7057985767079694)
('precision avg', 0.350690725741593)
('precision avg', 0.04347666120043913)
('precision avg', 0.6286822306460558)
('precision avg', 0.8572444640811792)
('precision avg', 0.8838432933187248)
('precision avg', 0.4369663029782896)
('precision avg', 0.7543074627097117)
('precision avg', 0.672786168820609)
('precision avg', 0.13308241469189427)
('precision avg', 0.7088740876453605)
('precision avg', 0.41742126841582045)
('precision avg', 0.8464097205988462)
('precision avg', 0.4417019947232166)
('precision avg', 0.5021943230481295)
('precision avg', 0.4915998914598782)
('precision avg', 0.578675352037152)
('precision avg', 0.018386000373501205)
('precision avg', 0.4320541005367761)
('precision avg', 0.3029125013356543)
('precision avg', 0.12218070191163449)
('precision avg', 0.4394908403162711)
('precision avg', 0.8753434627782866)
('precision avg', 0.3387610582651177)
('precision avg', 0.855375126938793)
('precision avg', 0.6766215452483197)
('precision avg', 0.3484809556323725)
('precision avg', 0.8919253176470745)
('precision avg', 0.12117487487387163)
('precision avg', 0.660445014823463)
('precision avg', 0.7906462969568482)
('precision avg', 0.406078738764565)
('precision avg', 0.07401168704789596)
('precision avg', 0.06668416878257147)
('precision avg', 0.8797908423638537)
('precision avg', 0.63792601378758)
('precision avg', 0.926445902055539)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
('precision avg', 0.6850494294232871)
('precision avg', 0.6040764658910175)
('precision avg', 0.553762919588722)
('precision avg', 0.6767895607401481)
('precision avg', 0.33316897330572265)
('precision avg', 0.045097010038526314)
('precision avg', 0.6083888210659326)
('precision avg', 0.9250015482359525)
('precision avg', 0.8413135303544912)
('precision avg', 0.30196347934410317)
('precision avg', 0.5811714938921815)
('precision avg', 0.6534683899153207)
('precision avg', 0.11640002710415726)
('precision avg', 0.3535572901091079)
('precision avg', 0.3041230830149431)
('precision avg', 0.7786809001887803)
('precision avg', 0.4186477840737833)
('precision avg', 0.46860182851819593)
('precision avg', 0.4313543594309007)
('precision avg', 0.31975003566238014)
('precision avg', 0.018336170825126605)
('precision avg', 0.33209885398619127)
('precision avg', 0.29940846505010726)
('precision avg', 0.12272875317104835)
('precision avg', 0.4017566608588345)
('precision avg', 0.7997548741274138)
('precision avg', 0.5437722450092386)
('precision avg', 0.8357303273376807)
('precision avg', 0.5177519581452089)
('precision avg', 0.2791851517078164)
('precision avg', 0.5729001117183127)
('precision avg', 0.1207055620925438)
('precision avg', 0.5461973136131762)
('precision avg', 0.7305810594010783)
('precision avg', 0.34039318503958943)
('precision avg', 0.07007420541341315)
('precision avg', 0.06624818281736693)
('precision avg', 0.8673455478551348)
('precision avg', 0.563772151455564)
('precision avg', 0.9210595620529616)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
('precision avg', 0.7085868798180318)
('precision avg', 0.5452230974470502)
('precision avg', 0.5720537517102889)
('precision avg', 0.5996675436639709)
('precision avg', 0.35502210934852385)
('precision avg', 0.044054500032989705)
('precision avg', 0.5986065482125335)
('precision avg', 0.8111021925809357)
('precision avg', 0.8605989019941799)
('precision avg', 0.33226391278577666)
('precision avg', 0.5507998052113364)
('precision avg', 0.6396386293000841)
('precision avg', 0.11919826889537567)
('precision avg', 0.46550867158986875)
('precision avg', 0.3116620725542801)
('precision avg', 0.6700112446095992)
('precision avg', 0.41877132883633295)
('precision avg', 0.45609212979185026)
('precision avg', 0.45163668180184763)
('precision avg', 0.3842242600405546)
('precision avg', 0.018210151381855617)
('precision avg', 0.3597736757896344)
('precision avg', 0.2894186892399318)
('precision avg', 0.11616947582549013)
('precision avg', 0.42023599354375335)
('precision avg', 0.8038458494347899)
('precision avg', 0.3258139976914447)
('precision avg', 0.8370761752694753)
('precision avg', 0.5984474642761501)
('precision avg', 0.2957366925768594)
('precision avg', 0.6203488498249787)
('precision avg', 0.12616883801360262)
('precision avg', 0.6366358379063284)
('precision avg', 0.7878965878803256)
('precision avg', 0.31730289839466586)
('precision avg', 0.07134534027116624)
('precision avg', 0.06563692613337585)
('precision avg', 0.8883697815371147)
('precision avg', 0.5591489354125457)
('precision avg', 0.8962966109245787)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
('precision avg', 0.845171687583807)
('precision avg', 0.6138419519432511)
('precision avg', 0.6604807297526994)
('precision avg', 0.814601457327025)
('precision avg', 0.4108209241176574)
('precision avg', 0.042433386182753455)
('precision avg', 0.62804991992724)
('precision avg', 0.8608073651801277)
('precision avg', 0.8958452423944537)
('precision avg', 0.4429766402972281)
('precision avg', 0.7578331189482941)
('precision avg', 0.6696646303443038)
('precision avg', 0.13348385856834152)
('precision avg', 0.6725911369907592)
('precision avg', 0.41579101137917696)
('precision avg', 0.8307937669693192)
('precision avg', 0.4385329315775029)
('precision avg', 0.5045095303547008)
('precision avg', 0.5713283942566657)
('precision avg', 0.5688487055711114)
('precision avg', 0.018287140557954475)
('precision avg', 0.4253522345143547)
('precision avg', 0.31429130410851036)
('precision avg', 0.13362793010722546)
('precision avg', 0.4775014374694188)
('precision avg', 0.8939153820029523)
('precision avg', 0.3291739634988008)
('precision avg', 0.8780918332144314)
('precision avg', 0.8696766652605129)
('precision avg', 0.34104189997176576)
('precision avg', 0.9269321571721406)
('precision avg', 0.19291701995638091)
('precision avg', 0.6700458372321113)
('precision avg', 0.9004401876670914)
('precision avg', 0.4615173559036399)
('precision avg', 0.07619402011443083)
('precision avg', 0.06688037449997622)
('precision avg', 0.9264909687190436)
('precision avg', 0.647029606678659)
('precision avg', 0.92854392161831)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
('precision avg', 0.42620741281304775)
('precision avg', 0.13467802471805546)
('precision avg', 0.19753138142997795)
('precision avg', 0.36240339186286513)
('precision avg', 0.07315564039653401)
('precision avg', 0.007736638561868903)
('precision avg', 0.5836816702543528)
('precision avg', 0.3108694782857746)
('precision avg', 0.36353215206468886)
('precision avg', 0.04998551810329787)
('precision avg', 0.375579362581543)
('precision avg', 0.19561991680653532)
('precision avg', 0.03774622737251699)
('precision avg', 0.3108646536464451)
('precision avg', 0.060235937357474324)
('precision avg', 0.4101324917837275)
('precision avg', 0.13401296466303134)
('precision avg', 0.05919721060422903)
('precision avg', 0.20247773008213288)
('precision avg', 0.17931260635945423)
('precision avg', 0.013623785917849206)
('precision avg', 0.14134634671225613)
('precision avg', 0.06418395143820504)
('precision avg', 0.03862377662399559)
('precision avg', 0.12564869282111016)
('precision avg', 0.6306054822791173)
('precision avg', 0.08660519833502513)
('precision avg', 0.6510800190870829)
('precision avg', 0.42373560283340894)
('precision avg', 0.09245746028554677)
('precision avg', 0.48511903626399655)
('precision avg', 0.10996307421215494)
('precision avg', 0.26944679283076217)
('precision avg', 0.36475352093750507)
('precision avg', 0.1298783825779457)
('precision avg', 0.01665606784896567)
('precision avg', 0.03007502159405593)
('precision avg', 0.5940573521660849)
('precision avg', 0.29333271544410583)
('precision avg', 0.5122452215303767)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
('precision avg', 0.20901849585870255)
('precision avg', 0.059986740214574584)
('precision avg', 0.10767432608632829)
('precision avg', 0.257515252955112)
('precision avg', 0.04548066527479219)
('precision avg', 0.026359227184905656)
('precision avg', 0.05359185225411894)
('precision avg', 0.259452776657885)
('precision avg', 0.08509745093616063)
('precision avg', 0.05581275411375033)
('precision avg', 0.4335891319208261)
('precision avg', 0.06402753069155678)
('precision avg', 0.0343743719528452)
('precision avg', 0.05855408531900379)
('precision avg', 0.030801142370060597)
('precision avg', 0.1207254953634531)
('precision avg', 0.022684054643112104)
('precision avg', 0.04517924477576478)
('precision avg', 0.17286342176717917)
('precision avg', 0.13330163214667995)
('precision avg', 0.01360822373383575)
('precision avg', 0.10473888905032128)
('precision avg', 0.06889188795616198)
('precision avg', 0.042838971662592956)
('precision avg', 0.09076425485857424)
('precision avg', 0.19892154478650526)
('precision avg', 0.06539045075135168)
('precision avg', 0.7097155883500723)
('precision avg', 0.17945710895699843)
('precision avg', 0.08303484471116346)
('precision avg', 0.20720090593774804)
('precision avg', 0.044021800471251676)
('precision avg', 0.12450890050980233)
('precision avg', 0.3682191437488494)
('precision avg', 0.048415999768495664)
('precision avg', 0.019098335185455066)
('precision avg', 0.029511317676966412)
('precision avg', 0.13596091613435268)
('precision avg', 0.06628139954248631)
('precision avg', 0.15894624504628735)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
('precision avg', 0.8087223234480646)
('precision avg', 0.6246431645192004)
('precision avg', 0.6564277365163304)
('precision avg', 0.8548475040578897)
('precision avg', 0.3866449923138486)
('precision avg', 0.04407018165599923)
('precision avg', 0.6305525928335406)
('precision avg', 0.8586431262759081)
('precision avg', 0.9094951163901436)
('precision avg', 0.43042619558685963)
('precision avg', 0.7143877357447221)
('precision avg', 0.6714638467737778)
('precision avg', 0.13167702125754785)
('precision avg', 0.6347119061510296)
('precision avg', 0.4218849025093671)
('precision avg', 0.8394696101599314)
('precision avg', 0.41909133109112984)
('precision avg', 0.49137907199335934)
('precision avg', 0.5434962294760681)
('precision avg', 0.5255237109769181)
('precision avg', 0.018207304858120263)
('precision avg', 0.41096334652475897)
('precision avg', 0.2995482008650643)
('precision avg', 0.12943625842051695)
('precision avg', 0.46851571662688785)
('precision avg', 0.8730005208941793)
('precision avg', 0.32624183171026927)
('precision avg', 0.87037309891489)
('precision avg', 0.8903295054227871)
('precision avg', 0.3247401991305493)
('precision avg', 0.9404262665672861)
('precision avg', 0.18612609604198724)
('precision avg', 0.6832190050181055)
('precision avg', 0.8503041167751857)
('precision avg', 0.3885914367069556)
('precision avg', 0.07223171179425797)
('precision avg', 0.06655548974343942)
('precision avg', 0.9018062702104928)
('precision avg', 0.6187842974937978)
('precision avg', 0.9281173744128661)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
('precision avg', 0.624509586529224)
('precision avg', 0.5211798048500964)
('precision avg', 0.47413698373950586)
('precision avg', 0.7639870392099527)
('precision avg', 0.30168348382925414)
('precision avg', 0.02016864363656134)
('precision avg', 0.5780249782557191)
('precision avg', 0.8533664269391111)
('precision avg', 0.7898786198862732)
('precision avg', 0.2962800934568883)
('precision avg', 0.6090391904597992)
('precision avg', 0.5711755450519884)
('precision avg', 0.12364045968228651)
('precision avg', 0.3897890357149691)
('precision avg', 0.4059495413065444)
('precision avg', 0.5947838565979695)
('precision avg', 0.3002877518789036)
('precision avg', 0.4305324047938475)
('precision avg', 0.43766238053965867)
('precision avg', 0.44113596091377016)
('precision avg', 0.01726963014185236)
('precision avg', 0.40366920898565617)
('precision avg', 0.2791625929367795)
('precision avg', 0.09226749054973236)
('precision avg', 0.3291085018457033)
('precision avg', 0.7523577774183531)
('precision avg', 0.2933510531370516)
('precision avg', 0.7789060527515866)
('precision avg', 0.4781585844754011)
('precision avg', 0.27262554887522134)
('precision avg', 0.8106990701712056)
('precision avg', 0.17948716199317488)
('precision avg', 0.46850561527616924)
('precision avg', 0.7429835013444328)
('precision avg', 0.26444680402070014)
('precision avg', 0.06978700649645515)
('precision avg', 0.06346258311336817)
('precision avg', 0.88178763991206)
('precision avg', 0.5501415443668539)
('precision avg', 0.6723481555542996)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
('precision avg', 0.582312988541893)
('precision avg', 0.5193456459563168)
('precision avg', 0.5907695245619571)
('precision avg', 0.6515399864573219)
('precision avg', 0.37115173971617904)
('precision avg', 0.043716349444529276)
('precision avg', 0.595289193234168)
('precision avg', 0.7738884403357201)
('precision avg', 0.7644592804937886)
('precision avg', 0.3128787204252841)
('precision avg', 0.5532927097447694)
('precision avg', 0.5907915319816335)
('precision avg', 0.12744554000908173)
('precision avg', 0.440919472040343)
('precision avg', 0.35022956666455196)
('precision avg', 0.7026935512166758)
('precision avg', 0.408552851894306)
('precision avg', 0.49478343796012586)
('precision avg', 0.46639164402721606)
('precision avg', 0.42375700219940554)
('precision avg', 0.018395332008463647)
('precision avg', 0.34478378415001126)
('precision avg', 0.29387452934098823)
('precision avg', 0.12303070023234008)
('precision avg', 0.4339480725003696)
('precision avg', 0.784355741068858)
('precision avg', 0.3340192053349481)
('precision avg', 0.7817656558124935)
('precision avg', 0.6212194798442074)
('precision avg', 0.3002309261235297)
('precision avg', 0.6426368875256027)
('precision avg', 0.18114137356170518)
('precision avg', 0.6597499715250379)
('precision avg', 0.7599538973666159)
('precision avg', 0.3236012443738165)
('precision avg', 0.06974145722281835)
('precision avg', 0.06628174494498319)
('precision avg', 0.8369926820075588)
('precision avg', 0.5302978779309909)
('precision avg', 0.8564583004210408)
[About 368 more lines. Double-click to unfold]
>>> extractor_avgs['BodyTextExtractor2Filter']
27: {'entertainment.msn.com': {'2000': {'avg_precision': 0.8464097205988462},
'2005': {'avg_precision': 0.4417019947232166},
'2010': {'avg_precision': 0.406078738764565},
'2015': {'avg_precision': 0.07401168704789596}},
'news.bbc.co.uk': {'2000': {'avg_precision': 0.8919253176470745},
'2005': {'avg_precision': 0.6766215452483197},
'2010': {'avg_precision': 0.41742126841582045},
'2015': {'avg_precision': 0.13308241469189427}},
'news.yahoo.com': {'2000': {'avg_precision': 0.8683999024519955},
'2005': {'avg_precision': 0.6484059381381134},
'2010': {'avg_precision': 0.578675352037152},
'2015': {'avg_precision': 0.018386000373501205}},
'thenation.com': {'2000': {'avg_precision': 0.8797908423638537},
'2005': {'avg_precision': 0.7906462969568482},
'2010': {'avg_precision': 0.4915998914598782},
'2015': {'avg_precision': 0.5021943230481295}},
'www.cnn.com': {'2000': {'avg_precision': 0.7057985767079694},
'2005': {'avg_precision': 0.6211738688863229},
'2010': {'avg_precision': 0.4320541005367761},
'2015': {'avg_precision': 0.3029125013356543}},
'www.esquire.com': {'2000': {'avg_precision': 0.8838432933187248},
'2005': {'avg_precision': 0.926445902055539},
'2010': {'avg_precision': 0.350690725741593},
'2015': {'avg_precision': 0.06668416878257147}},
'www.forbes.com': {'2000': {'avg_precision': 0.672786168820609},
'2005': {'avg_precision': 0.7088740876453605},
'2010': {'avg_precision': 0.660445014823463},
'2015': {'avg_precision': 0.3484809556323725}},
'www.foxnews.com': {'2000': {'avg_precision': 0.8753434627782866},
'2005': {'avg_precision': 0.3387610582651177},
'2010': {'avg_precision': 0.4369663029782896},
'2015': {'avg_precision': 0.7543074627097117}},
'www.latimes.com': {'2000': {'avg_precision': 0.8572444640811792},
'2005': {'avg_precision': 0.6286822306460558},
'2010': {'avg_precision': 0.4394908403162711},
'2015': {'avg_precision': 0.12218070191163449}},
'www.nymag.com': {'2000': {'avg_precision': 0.855375126938793},
'2005': {'avg_precision': 0.63792601378758},
'2010': {'avg_precision': 0.12117487487387163},
'2015': {'avg_precision': 0.04347666120043913}}}
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),}
...
...
... print("precision avg", sum([d['p'] for d in val])/len(val))
...
... extractor_avgs[extractor] = avg_results
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
('precision avg', 0.8683999024519955)
('precision avg', 0.6211738688863229)
('precision avg', 0.6484059381381134)
('precision avg', 0.7057985767079694)
('precision avg', 0.350690725741593)
('precision avg', 0.04347666120043913)
('precision avg', 0.6286822306460558)
('precision avg', 0.8572444640811792)
('precision avg', 0.8838432933187248)
('precision avg', 0.4369663029782896)
('precision avg', 0.7543074627097117)
('precision avg', 0.672786168820609)
('precision avg', 0.13308241469189427)
('precision avg', 0.7088740876453605)
('precision avg', 0.41742126841582045)
('precision avg', 0.8464097205988462)
('precision avg', 0.4417019947232166)
('precision avg', 0.5021943230481295)
('precision avg', 0.4915998914598782)
('precision avg', 0.578675352037152)
('precision avg', 0.018386000373501205)
('precision avg', 0.4320541005367761)
('precision avg', 0.3029125013356543)
('precision avg', 0.12218070191163449)
('precision avg', 0.4394908403162711)
('precision avg', 0.8753434627782866)
('precision avg', 0.3387610582651177)
('precision avg', 0.855375126938793)
('precision avg', 0.6766215452483197)
('precision avg', 0.3484809556323725)
('precision avg', 0.8919253176470745)
('precision avg', 0.12117487487387163)
('precision avg', 0.660445014823463)
('precision avg', 0.7906462969568482)
('precision avg', 0.406078738764565)
('precision avg', 0.07401168704789596)
('precision avg', 0.06668416878257147)
('precision avg', 0.8797908423638537)
('precision avg', 0.63792601378758)
('precision avg', 0.926445902055539)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
('precision avg', 0.6850494294232871)
('precision avg', 0.6040764658910175)
('precision avg', 0.553762919588722)
('precision avg', 0.6767895607401481)
('precision avg', 0.33316897330572265)
('precision avg', 0.045097010038526314)
('precision avg', 0.6083888210659326)
('precision avg', 0.9250015482359525)
('precision avg', 0.8413135303544912)
('precision avg', 0.30196347934410317)
('precision avg', 0.5811714938921815)
('precision avg', 0.6534683899153207)
('precision avg', 0.11640002710415726)
('precision avg', 0.3535572901091079)
('precision avg', 0.3041230830149431)
('precision avg', 0.7786809001887803)
('precision avg', 0.4186477840737833)
('precision avg', 0.46860182851819593)
('precision avg', 0.4313543594309007)
('precision avg', 0.31975003566238014)
('precision avg', 0.018336170825126605)
('precision avg', 0.33209885398619127)
('precision avg', 0.29940846505010726)
('precision avg', 0.12272875317104835)
('precision avg', 0.4017566608588345)
('precision avg', 0.7997548741274138)
('precision avg', 0.5437722450092386)
('precision avg', 0.8357303273376807)
('precision avg', 0.5177519581452089)
('precision avg', 0.2791851517078164)
('precision avg', 0.5729001117183127)
('precision avg', 0.1207055620925438)
('precision avg', 0.5461973136131762)
('precision avg', 0.7305810594010783)
('precision avg', 0.34039318503958943)
('precision avg', 0.07007420541341315)
('precision avg', 0.06624818281736693)
('precision avg', 0.8673455478551348)
('precision avg', 0.563772151455564)
('precision avg', 0.9210595620529616)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
('precision avg', 0.7085868798180318)
('precision avg', 0.5452230974470502)
('precision avg', 0.5720537517102889)
('precision avg', 0.5996675436639709)
('precision avg', 0.35502210934852385)
('precision avg', 0.044054500032989705)
('precision avg', 0.5986065482125335)
('precision avg', 0.8111021925809357)
('precision avg', 0.8605989019941799)
('precision avg', 0.33226391278577666)
('precision avg', 0.5507998052113364)
('precision avg', 0.6396386293000841)
('precision avg', 0.11919826889537567)
('precision avg', 0.46550867158986875)
('precision avg', 0.3116620725542801)
('precision avg', 0.6700112446095992)
('precision avg', 0.41877132883633295)
('precision avg', 0.45609212979185026)
('precision avg', 0.45163668180184763)
('precision avg', 0.3842242600405546)
('precision avg', 0.018210151381855617)
('precision avg', 0.3597736757896344)
('precision avg', 0.2894186892399318)
('precision avg', 0.11616947582549013)
('precision avg', 0.42023599354375335)
('precision avg', 0.8038458494347899)
('precision avg', 0.3258139976914447)
('precision avg', 0.8370761752694753)
('precision avg', 0.5984474642761501)
('precision avg', 0.2957366925768594)
('precision avg', 0.6203488498249787)
('precision avg', 0.12616883801360262)
('precision avg', 0.6366358379063284)
('precision avg', 0.7878965878803256)
('precision avg', 0.31730289839466586)
('precision avg', 0.07134534027116624)
('precision avg', 0.06563692613337585)
('precision avg', 0.8883697815371147)
('precision avg', 0.5591489354125457)
('precision avg', 0.8962966109245787)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
('precision avg', 0.845171687583807)
('precision avg', 0.6138419519432511)
('precision avg', 0.6604807297526994)
('precision avg', 0.814601457327025)
('precision avg', 0.4108209241176574)
('precision avg', 0.042433386182753455)
('precision avg', 0.62804991992724)
('precision avg', 0.8608073651801277)
('precision avg', 0.8958452423944537)
('precision avg', 0.4429766402972281)
('precision avg', 0.7578331189482941)
('precision avg', 0.6696646303443038)
('precision avg', 0.13348385856834152)
('precision avg', 0.6725911369907592)
('precision avg', 0.41579101137917696)
('precision avg', 0.8307937669693192)
('precision avg', 0.4385329315775029)
('precision avg', 0.5045095303547008)
('precision avg', 0.5713283942566657)
('precision avg', 0.5688487055711114)
('precision avg', 0.018287140557954475)
('precision avg', 0.4253522345143547)
('precision avg', 0.31429130410851036)
('precision avg', 0.13362793010722546)
('precision avg', 0.4775014374694188)
('precision avg', 0.8939153820029523)
('precision avg', 0.3291739634988008)
('precision avg', 0.8780918332144314)
('precision avg', 0.8696766652605129)
('precision avg', 0.34104189997176576)
('precision avg', 0.9269321571721406)
('precision avg', 0.19291701995638091)
('precision avg', 0.6700458372321113)
('precision avg', 0.9004401876670914)
('precision avg', 0.4615173559036399)
('precision avg', 0.07619402011443083)
('precision avg', 0.06688037449997622)
('precision avg', 0.9264909687190436)
('precision avg', 0.647029606678659)
('precision avg', 0.92854392161831)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
('precision avg', 0.42620741281304775)
('precision avg', 0.13467802471805546)
('precision avg', 0.19753138142997795)
('precision avg', 0.36240339186286513)
('precision avg', 0.07315564039653401)
('precision avg', 0.007736638561868903)
('precision avg', 0.5836816702543528)
('precision avg', 0.3108694782857746)
('precision avg', 0.36353215206468886)
('precision avg', 0.04998551810329787)
('precision avg', 0.375579362581543)
('precision avg', 0.19561991680653532)
('precision avg', 0.03774622737251699)
('precision avg', 0.3108646536464451)
('precision avg', 0.060235937357474324)
('precision avg', 0.4101324917837275)
('precision avg', 0.13401296466303134)
('precision avg', 0.05919721060422903)
('precision avg', 0.20247773008213288)
('precision avg', 0.17931260635945423)
('precision avg', 0.013623785917849206)
('precision avg', 0.14134634671225613)
('precision avg', 0.06418395143820504)
('precision avg', 0.03862377662399559)
('precision avg', 0.12564869282111016)
('precision avg', 0.6306054822791173)
('precision avg', 0.08660519833502513)
('precision avg', 0.6510800190870829)
('precision avg', 0.42373560283340894)
('precision avg', 0.09245746028554677)
('precision avg', 0.48511903626399655)
('precision avg', 0.10996307421215494)
('precision avg', 0.26944679283076217)
('precision avg', 0.36475352093750507)
('precision avg', 0.1298783825779457)
('precision avg', 0.01665606784896567)
('precision avg', 0.03007502159405593)
('precision avg', 0.5940573521660849)
('precision avg', 0.29333271544410583)
('precision avg', 0.5122452215303767)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
('precision avg', 0.20901849585870255)
('precision avg', 0.059986740214574584)
('precision avg', 0.10767432608632829)
('precision avg', 0.257515252955112)
('precision avg', 0.04548066527479219)
('precision avg', 0.026359227184905656)
('precision avg', 0.05359185225411894)
('precision avg', 0.259452776657885)
('precision avg', 0.08509745093616063)
('precision avg', 0.05581275411375033)
('precision avg', 0.4335891319208261)
('precision avg', 0.06402753069155678)
('precision avg', 0.0343743719528452)
('precision avg', 0.05855408531900379)
('precision avg', 0.030801142370060597)
('precision avg', 0.1207254953634531)
('precision avg', 0.022684054643112104)
('precision avg', 0.04517924477576478)
('precision avg', 0.17286342176717917)
('precision avg', 0.13330163214667995)
('precision avg', 0.01360822373383575)
('precision avg', 0.10473888905032128)
('precision avg', 0.06889188795616198)
('precision avg', 0.042838971662592956)
('precision avg', 0.09076425485857424)
('precision avg', 0.19892154478650526)
('precision avg', 0.06539045075135168)
('precision avg', 0.7097155883500723)
('precision avg', 0.17945710895699843)
('precision avg', 0.08303484471116346)
('precision avg', 0.20720090593774804)
('precision avg', 0.044021800471251676)
('precision avg', 0.12450890050980233)
('precision avg', 0.3682191437488494)
('precision avg', 0.048415999768495664)
('precision avg', 0.019098335185455066)
('precision avg', 0.029511317676966412)
('precision avg', 0.13596091613435268)
('precision avg', 0.06628139954248631)
('precision avg', 0.15894624504628735)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
('precision avg', 0.8087223234480646)
('precision avg', 0.6246431645192004)
('precision avg', 0.6564277365163304)
('precision avg', 0.8548475040578897)
('precision avg', 0.3866449923138486)
('precision avg', 0.04407018165599923)
('precision avg', 0.6305525928335406)
('precision avg', 0.8586431262759081)
('precision avg', 0.9094951163901436)
('precision avg', 0.43042619558685963)
('precision avg', 0.7143877357447221)
('precision avg', 0.6714638467737778)
('precision avg', 0.13167702125754785)
('precision avg', 0.6347119061510296)
('precision avg', 0.4218849025093671)
('precision avg', 0.8394696101599314)
('precision avg', 0.41909133109112984)
('precision avg', 0.49137907199335934)
('precision avg', 0.5434962294760681)
('precision avg', 0.5255237109769181)
('precision avg', 0.018207304858120263)
('precision avg', 0.41096334652475897)
('precision avg', 0.2995482008650643)
('precision avg', 0.12943625842051695)
('precision avg', 0.46851571662688785)
('precision avg', 0.8730005208941793)
('precision avg', 0.32624183171026927)
('precision avg', 0.87037309891489)
('precision avg', 0.8903295054227871)
('precision avg', 0.3247401991305493)
('precision avg', 0.9404262665672861)
('precision avg', 0.18612609604198724)
('precision avg', 0.6832190050181055)
('precision avg', 0.8503041167751857)
('precision avg', 0.3885914367069556)
('precision avg', 0.07223171179425797)
('precision avg', 0.06655548974343942)
('precision avg', 0.9018062702104928)
('precision avg', 0.6187842974937978)
('precision avg', 0.9281173744128661)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
('precision avg', 0.624509586529224)
('precision avg', 0.5211798048500964)
('precision avg', 0.47413698373950586)
('precision avg', 0.7639870392099527)
('precision avg', 0.30168348382925414)
('precision avg', 0.02016864363656134)
('precision avg', 0.5780249782557191)
('precision avg', 0.8533664269391111)
('precision avg', 0.7898786198862732)
('precision avg', 0.2962800934568883)
('precision avg', 0.6090391904597992)
('precision avg', 0.5711755450519884)
('precision avg', 0.12364045968228651)
('precision avg', 0.3897890357149691)
('precision avg', 0.4059495413065444)
('precision avg', 0.5947838565979695)
('precision avg', 0.3002877518789036)
('precision avg', 0.4305324047938475)
('precision avg', 0.43766238053965867)
('precision avg', 0.44113596091377016)
('precision avg', 0.01726963014185236)
('precision avg', 0.40366920898565617)
('precision avg', 0.2791625929367795)
('precision avg', 0.09226749054973236)
('precision avg', 0.3291085018457033)
('precision avg', 0.7523577774183531)
('precision avg', 0.2933510531370516)
('precision avg', 0.7789060527515866)
('precision avg', 0.4781585844754011)
('precision avg', 0.27262554887522134)
('precision avg', 0.8106990701712056)
('precision avg', 0.17948716199317488)
('precision avg', 0.46850561527616924)
('precision avg', 0.7429835013444328)
('precision avg', 0.26444680402070014)
('precision avg', 0.06978700649645515)
('precision avg', 0.06346258311336817)
('precision avg', 0.88178763991206)
('precision avg', 0.5501415443668539)
('precision avg', 0.6723481555542996)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
('precision avg', 0.582312988541893)
('precision avg', 0.5193456459563168)
('precision avg', 0.5907695245619571)
('precision avg', 0.6515399864573219)
('precision avg', 0.37115173971617904)
('precision avg', 0.043716349444529276)
('precision avg', 0.595289193234168)
('precision avg', 0.7738884403357201)
('precision avg', 0.7644592804937886)
('precision avg', 0.3128787204252841)
('precision avg', 0.5532927097447694)
('precision avg', 0.5907915319816335)
('precision avg', 0.12744554000908173)
('precision avg', 0.440919472040343)
('precision avg', 0.35022956666455196)
('precision avg', 0.7026935512166758)
('precision avg', 0.408552851894306)
('precision avg', 0.49478343796012586)
('precision avg', 0.46639164402721606)
('precision avg', 0.42375700219940554)
('precision avg', 0.018395332008463647)
('precision avg', 0.34478378415001126)
('precision avg', 0.29387452934098823)
('precision avg', 0.12303070023234008)
('precision avg', 0.4339480725003696)
('precision avg', 0.784355741068858)
('precision avg', 0.3340192053349481)
('precision avg', 0.7817656558124935)
('precision avg', 0.6212194798442074)
('precision avg', 0.3002309261235297)
('precision avg', 0.6426368875256027)
('precision avg', 0.18114137356170518)
('precision avg', 0.6597499715250379)
('precision avg', 0.7599538973666159)
('precision avg', 0.3236012443738165)
('precision avg', 0.06974145722281835)
('precision avg', 0.06628174494498319)
('precision avg', 0.8369926820075588)
('precision avg', 0.5302978779309909)
('precision avg', 0.8564583004210408)
[About 368 more lines. Double-click to unfold]
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
... print("precision avg", sum([d['p'] for d in val])/len(val))
...
... extractor_avgs[extractor] = avg_results
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
('precision avg', 0.8683999024519955)
('precision avg', 0.6211738688863229)
('precision avg', 0.6484059381381134)
('precision avg', 0.7057985767079694)
('precision avg', 0.350690725741593)
('precision avg', 0.04347666120043913)
('precision avg', 0.6286822306460558)
('precision avg', 0.8572444640811792)
('precision avg', 0.8838432933187248)
('precision avg', 0.4369663029782896)
('precision avg', 0.7543074627097117)
('precision avg', 0.672786168820609)
('precision avg', 0.13308241469189427)
('precision avg', 0.7088740876453605)
('precision avg', 0.41742126841582045)
('precision avg', 0.8464097205988462)
('precision avg', 0.4417019947232166)
('precision avg', 0.5021943230481295)
('precision avg', 0.4915998914598782)
('precision avg', 0.578675352037152)
('precision avg', 0.018386000373501205)
('precision avg', 0.4320541005367761)
('precision avg', 0.3029125013356543)
('precision avg', 0.12218070191163449)
('precision avg', 0.4394908403162711)
('precision avg', 0.8753434627782866)
('precision avg', 0.3387610582651177)
('precision avg', 0.855375126938793)
('precision avg', 0.6766215452483197)
('precision avg', 0.3484809556323725)
('precision avg', 0.8919253176470745)
('precision avg', 0.12117487487387163)
('precision avg', 0.660445014823463)
('precision avg', 0.7906462969568482)
('precision avg', 0.406078738764565)
('precision avg', 0.07401168704789596)
('precision avg', 0.06668416878257147)
('precision avg', 0.8797908423638537)
('precision avg', 0.63792601378758)
('precision avg', 0.926445902055539)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
('precision avg', 0.6850494294232871)
('precision avg', 0.6040764658910175)
('precision avg', 0.553762919588722)
('precision avg', 0.6767895607401481)
('precision avg', 0.33316897330572265)
('precision avg', 0.045097010038526314)
('precision avg', 0.6083888210659326)
('precision avg', 0.9250015482359525)
('precision avg', 0.8413135303544912)
('precision avg', 0.30196347934410317)
('precision avg', 0.5811714938921815)
('precision avg', 0.6534683899153207)
('precision avg', 0.11640002710415726)
('precision avg', 0.3535572901091079)
('precision avg', 0.3041230830149431)
('precision avg', 0.7786809001887803)
('precision avg', 0.4186477840737833)
('precision avg', 0.46860182851819593)
('precision avg', 0.4313543594309007)
('precision avg', 0.31975003566238014)
('precision avg', 0.018336170825126605)
('precision avg', 0.33209885398619127)
('precision avg', 0.29940846505010726)
('precision avg', 0.12272875317104835)
('precision avg', 0.4017566608588345)
('precision avg', 0.7997548741274138)
('precision avg', 0.5437722450092386)
('precision avg', 0.8357303273376807)
('precision avg', 0.5177519581452089)
('precision avg', 0.2791851517078164)
('precision avg', 0.5729001117183127)
('precision avg', 0.1207055620925438)
('precision avg', 0.5461973136131762)
('precision avg', 0.7305810594010783)
('precision avg', 0.34039318503958943)
('precision avg', 0.07007420541341315)
('precision avg', 0.06624818281736693)
('precision avg', 0.8673455478551348)
('precision avg', 0.563772151455564)
('precision avg', 0.9210595620529616)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
('precision avg', 0.7085868798180318)
('precision avg', 0.5452230974470502)
('precision avg', 0.5720537517102889)
('precision avg', 0.5996675436639709)
('precision avg', 0.35502210934852385)
('precision avg', 0.044054500032989705)
('precision avg', 0.5986065482125335)
('precision avg', 0.8111021925809357)
('precision avg', 0.8605989019941799)
('precision avg', 0.33226391278577666)
('precision avg', 0.5507998052113364)
('precision avg', 0.6396386293000841)
('precision avg', 0.11919826889537567)
('precision avg', 0.46550867158986875)
('precision avg', 0.3116620725542801)
('precision avg', 0.6700112446095992)
('precision avg', 0.41877132883633295)
('precision avg', 0.45609212979185026)
('precision avg', 0.45163668180184763)
('precision avg', 0.3842242600405546)
('precision avg', 0.018210151381855617)
('precision avg', 0.3597736757896344)
('precision avg', 0.2894186892399318)
('precision avg', 0.11616947582549013)
('precision avg', 0.42023599354375335)
('precision avg', 0.8038458494347899)
('precision avg', 0.3258139976914447)
('precision avg', 0.8370761752694753)
('precision avg', 0.5984474642761501)
('precision avg', 0.2957366925768594)
('precision avg', 0.6203488498249787)
('precision avg', 0.12616883801360262)
('precision avg', 0.6366358379063284)
('precision avg', 0.7878965878803256)
('precision avg', 0.31730289839466586)
('precision avg', 0.07134534027116624)
('precision avg', 0.06563692613337585)
('precision avg', 0.8883697815371147)
('precision avg', 0.5591489354125457)
('precision avg', 0.8962966109245787)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
('precision avg', 0.845171687583807)
('precision avg', 0.6138419519432511)
('precision avg', 0.6604807297526994)
('precision avg', 0.814601457327025)
('precision avg', 0.4108209241176574)
('precision avg', 0.042433386182753455)
('precision avg', 0.62804991992724)
('precision avg', 0.8608073651801277)
('precision avg', 0.8958452423944537)
('precision avg', 0.4429766402972281)
('precision avg', 0.7578331189482941)
('precision avg', 0.6696646303443038)
('precision avg', 0.13348385856834152)
('precision avg', 0.6725911369907592)
('precision avg', 0.41579101137917696)
('precision avg', 0.8307937669693192)
('precision avg', 0.4385329315775029)
('precision avg', 0.5045095303547008)
('precision avg', 0.5713283942566657)
('precision avg', 0.5688487055711114)
('precision avg', 0.018287140557954475)
('precision avg', 0.4253522345143547)
('precision avg', 0.31429130410851036)
('precision avg', 0.13362793010722546)
('precision avg', 0.4775014374694188)
('precision avg', 0.8939153820029523)
('precision avg', 0.3291739634988008)
('precision avg', 0.8780918332144314)
('precision avg', 0.8696766652605129)
('precision avg', 0.34104189997176576)
('precision avg', 0.9269321571721406)
('precision avg', 0.19291701995638091)
('precision avg', 0.6700458372321113)
('precision avg', 0.9004401876670914)
('precision avg', 0.4615173559036399)
('precision avg', 0.07619402011443083)
('precision avg', 0.06688037449997622)
('precision avg', 0.9264909687190436)
('precision avg', 0.647029606678659)
('precision avg', 0.92854392161831)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
('precision avg', 0.42620741281304775)
('precision avg', 0.13467802471805546)
('precision avg', 0.19753138142997795)
('precision avg', 0.36240339186286513)
('precision avg', 0.07315564039653401)
('precision avg', 0.007736638561868903)
('precision avg', 0.5836816702543528)
('precision avg', 0.3108694782857746)
('precision avg', 0.36353215206468886)
('precision avg', 0.04998551810329787)
('precision avg', 0.375579362581543)
('precision avg', 0.19561991680653532)
('precision avg', 0.03774622737251699)
('precision avg', 0.3108646536464451)
('precision avg', 0.060235937357474324)
('precision avg', 0.4101324917837275)
('precision avg', 0.13401296466303134)
('precision avg', 0.05919721060422903)
('precision avg', 0.20247773008213288)
('precision avg', 0.17931260635945423)
('precision avg', 0.013623785917849206)
('precision avg', 0.14134634671225613)
('precision avg', 0.06418395143820504)
('precision avg', 0.03862377662399559)
('precision avg', 0.12564869282111016)
('precision avg', 0.6306054822791173)
('precision avg', 0.08660519833502513)
('precision avg', 0.6510800190870829)
('precision avg', 0.42373560283340894)
('precision avg', 0.09245746028554677)
('precision avg', 0.48511903626399655)
('precision avg', 0.10996307421215494)
('precision avg', 0.26944679283076217)
('precision avg', 0.36475352093750507)
('precision avg', 0.1298783825779457)
('precision avg', 0.01665606784896567)
('precision avg', 0.03007502159405593)
('precision avg', 0.5940573521660849)
('precision avg', 0.29333271544410583)
('precision avg', 0.5122452215303767)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
('precision avg', 0.20901849585870255)
('precision avg', 0.059986740214574584)
('precision avg', 0.10767432608632829)
('precision avg', 0.257515252955112)
('precision avg', 0.04548066527479219)
('precision avg', 0.026359227184905656)
('precision avg', 0.05359185225411894)
('precision avg', 0.259452776657885)
('precision avg', 0.08509745093616063)
('precision avg', 0.05581275411375033)
('precision avg', 0.4335891319208261)
('precision avg', 0.06402753069155678)
('precision avg', 0.0343743719528452)
('precision avg', 0.05855408531900379)
('precision avg', 0.030801142370060597)
('precision avg', 0.1207254953634531)
('precision avg', 0.022684054643112104)
('precision avg', 0.04517924477576478)
('precision avg', 0.17286342176717917)
('precision avg', 0.13330163214667995)
('precision avg', 0.01360822373383575)
('precision avg', 0.10473888905032128)
('precision avg', 0.06889188795616198)
('precision avg', 0.042838971662592956)
('precision avg', 0.09076425485857424)
('precision avg', 0.19892154478650526)
('precision avg', 0.06539045075135168)
('precision avg', 0.7097155883500723)
('precision avg', 0.17945710895699843)
('precision avg', 0.08303484471116346)
('precision avg', 0.20720090593774804)
('precision avg', 0.044021800471251676)
('precision avg', 0.12450890050980233)
('precision avg', 0.3682191437488494)
('precision avg', 0.048415999768495664)
('precision avg', 0.019098335185455066)
('precision avg', 0.029511317676966412)
('precision avg', 0.13596091613435268)
('precision avg', 0.06628139954248631)
('precision avg', 0.15894624504628735)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
('precision avg', 0.8087223234480646)
('precision avg', 0.6246431645192004)
('precision avg', 0.6564277365163304)
('precision avg', 0.8548475040578897)
('precision avg', 0.3866449923138486)
('precision avg', 0.04407018165599923)
('precision avg', 0.6305525928335406)
('precision avg', 0.8586431262759081)
('precision avg', 0.9094951163901436)
('precision avg', 0.43042619558685963)
('precision avg', 0.7143877357447221)
('precision avg', 0.6714638467737778)
('precision avg', 0.13167702125754785)
('precision avg', 0.6347119061510296)
('precision avg', 0.4218849025093671)
('precision avg', 0.8394696101599314)
('precision avg', 0.41909133109112984)
('precision avg', 0.49137907199335934)
('precision avg', 0.5434962294760681)
('precision avg', 0.5255237109769181)
('precision avg', 0.018207304858120263)
('precision avg', 0.41096334652475897)
('precision avg', 0.2995482008650643)
('precision avg', 0.12943625842051695)
('precision avg', 0.46851571662688785)
('precision avg', 0.8730005208941793)
('precision avg', 0.32624183171026927)
('precision avg', 0.87037309891489)
('precision avg', 0.8903295054227871)
('precision avg', 0.3247401991305493)
('precision avg', 0.9404262665672861)
('precision avg', 0.18612609604198724)
('precision avg', 0.6832190050181055)
('precision avg', 0.8503041167751857)
('precision avg', 0.3885914367069556)
('precision avg', 0.07223171179425797)
('precision avg', 0.06655548974343942)
('precision avg', 0.9018062702104928)
('precision avg', 0.6187842974937978)
('precision avg', 0.9281173744128661)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
('precision avg', 0.624509586529224)
('precision avg', 0.5211798048500964)
('precision avg', 0.47413698373950586)
('precision avg', 0.7639870392099527)
('precision avg', 0.30168348382925414)
('precision avg', 0.02016864363656134)
('precision avg', 0.5780249782557191)
('precision avg', 0.8533664269391111)
('precision avg', 0.7898786198862732)
('precision avg', 0.2962800934568883)
('precision avg', 0.6090391904597992)
('precision avg', 0.5711755450519884)
('precision avg', 0.12364045968228651)
('precision avg', 0.3897890357149691)
('precision avg', 0.4059495413065444)
('precision avg', 0.5947838565979695)
('precision avg', 0.3002877518789036)
('precision avg', 0.4305324047938475)
('precision avg', 0.43766238053965867)
('precision avg', 0.44113596091377016)
('precision avg', 0.01726963014185236)
('precision avg', 0.40366920898565617)
('precision avg', 0.2791625929367795)
('precision avg', 0.09226749054973236)
('precision avg', 0.3291085018457033)
('precision avg', 0.7523577774183531)
('precision avg', 0.2933510531370516)
('precision avg', 0.7789060527515866)
('precision avg', 0.4781585844754011)
('precision avg', 0.27262554887522134)
('precision avg', 0.8106990701712056)
('precision avg', 0.17948716199317488)
('precision avg', 0.46850561527616924)
('precision avg', 0.7429835013444328)
('precision avg', 0.26444680402070014)
('precision avg', 0.06978700649645515)
('precision avg', 0.06346258311336817)
('precision avg', 0.88178763991206)
('precision avg', 0.5501415443668539)
('precision avg', 0.6723481555542996)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
('precision avg', 0.582312988541893)
('precision avg', 0.5193456459563168)
('precision avg', 0.5907695245619571)
('precision avg', 0.6515399864573219)
('precision avg', 0.37115173971617904)
('precision avg', 0.043716349444529276)
('precision avg', 0.595289193234168)
('precision avg', 0.7738884403357201)
('precision avg', 0.7644592804937886)
('precision avg', 0.3128787204252841)
('precision avg', 0.5532927097447694)
('precision avg', 0.5907915319816335)
('precision avg', 0.12744554000908173)
('precision avg', 0.440919472040343)
('precision avg', 0.35022956666455196)
('precision avg', 0.7026935512166758)
('precision avg', 0.408552851894306)
('precision avg', 0.49478343796012586)
('precision avg', 0.46639164402721606)
('precision avg', 0.42375700219940554)
('precision avg', 0.018395332008463647)
('precision avg', 0.34478378415001126)
('precision avg', 0.29387452934098823)
('precision avg', 0.12303070023234008)
('precision avg', 0.4339480725003696)
('precision avg', 0.784355741068858)
('precision avg', 0.3340192053349481)
('precision avg', 0.7817656558124935)
('precision avg', 0.6212194798442074)
('precision avg', 0.3002309261235297)
('precision avg', 0.6426368875256027)
('precision avg', 0.18114137356170518)
('precision avg', 0.6597499715250379)
('precision avg', 0.7599538973666159)
('precision avg', 0.3236012443738165)
('precision avg', 0.06974145722281835)
('precision avg', 0.06628174494498319)
('precision avg', 0.8369926820075588)
('precision avg', 0.5302978779309909)
('precision avg', 0.8564583004210408)
[About 368 more lines. Double-click to unfold]
>>> extractor_avgs['BodyTextExtractor2Filter']
28: {'entertainment.msn.com': {'2000': {'avg_accuracy': 0.9244090539071942,
'avg_precision': 0.8464097205988462},
'2005': {'avg_accuracy': 0.6483814493886327,
'avg_precision': 0.4417019947232166},
'2010': {'avg_accuracy': 0.5739534002357349,
'avg_precision': 0.406078738764565},
'2015': {'avg_accuracy': 0.5518234073818731,
'avg_precision': 0.07401168704789596}},
'news.bbc.co.uk': {'2000': {'avg_accuracy': 0.9021006354735241,
'avg_precision': 0.8919253176470745},
'2005': {'avg_accuracy': 0.8099502557304525,
'avg_precision': 0.6766215452483197},
'2010': {'avg_accuracy': 0.73532761388079,
'avg_precision': 0.41742126841582045},
'2015': {'avg_accuracy': 0.5565193185460882,
'avg_precision': 0.13308241469189427}},
'news.yahoo.com': {'2000': {'avg_accuracy': 0.8906547313136641,
'avg_precision': 0.8683999024519955},
'2005': {'avg_accuracy': 0.7924984710068734,
'avg_precision': 0.6484059381381134},
'2010': {'avg_accuracy': 0.7999153102686466,
'avg_precision': 0.578675352037152},
'2015': {'avg_accuracy': 0.3219588516463774,
'avg_precision': 0.018386000373501205}},
'thenation.com': {'2000': {'avg_accuracy': 0.8954149201859337,
'avg_precision': 0.8797908423638537},
'2005': {'avg_accuracy': 0.804806209394557,
'avg_precision': 0.7906462969568482},
'2010': {'avg_accuracy': 0.6740374139494874,
'avg_precision': 0.4915998914598782},
'2015': {'avg_accuracy': 0.6543885449100943,
'avg_precision': 0.5021943230481295}},
'www.cnn.com': {'2000': {'avg_accuracy': 0.800733747469661,
'avg_precision': 0.7057985767079694},
'2005': {'avg_accuracy': 0.8096439361653318,
'avg_precision': 0.6211738688863229},
'2010': {'avg_accuracy': 0.6084899901455145,
'avg_precision': 0.4320541005367761},
'2015': {'avg_accuracy': 0.40317409919155567,
'avg_precision': 0.3029125013356543}},
'www.esquire.com': {'2000': {'avg_accuracy': 0.9074614289477201,
'avg_precision': 0.8838432933187248},
'2005': {'avg_accuracy': 0.9454220019060875,
'avg_precision': 0.926445902055539},
'2010': {'avg_accuracy': 0.5462068777107696,
'avg_precision': 0.350690725741593},
'2015': {'avg_accuracy': 0.33177659602885284,
'avg_precision': 0.06668416878257147}},
'www.forbes.com': {'2000': {'avg_accuracy': 0.7794584406932229,
'avg_precision': 0.672786168820609},
'2005': {'avg_accuracy': 0.9168198868029921,
'avg_precision': 0.7088740876453605},
'2010': {'avg_accuracy': 0.8116505246063985,
'avg_precision': 0.660445014823463},
'2015': {'avg_accuracy': 0.5365559659335577,
'avg_precision': 0.3484809556323725}},
'www.foxnews.com': {'2000': {'avg_accuracy': 0.9075440551901525,
'avg_precision': 0.8753434627782866},
'2005': {'avg_accuracy': 0.49737059246242404,
'avg_precision': 0.3387610582651177},
'2010': {'avg_accuracy': 0.744614672197503,
'avg_precision': 0.4369663029782896},
'2015': {'avg_accuracy': 0.8435262483444723,
'avg_precision': 0.7543074627097117}},
'www.latimes.com': {'2000': {'avg_accuracy': 0.8802343747738629,
'avg_precision': 0.8572444640811792},
'2005': {'avg_accuracy': 0.7569839835615282,
'avg_precision': 0.6286822306460558},
'2010': {'avg_accuracy': 0.681661606253343,
'avg_precision': 0.4394908403162711},
'2015': {'avg_accuracy': 0.4928797528462945,
'avg_precision': 0.12218070191163449}},
'www.nymag.com': {'2000': {'avg_accuracy': 0.8992183949804482,
'avg_precision': 0.855375126938793},
'2005': {'avg_accuracy': 0.7500215514070161,
'avg_precision': 0.63792601378758},
'2010': {'avg_accuracy': 0.3941642691060131,
'avg_precision': 0.12117487487387163},
'2015': {'avg_accuracy': 0.163241251572618,
'avg_precision': 0.04347666120043913}}}
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
... print("precision avg", sum([d['p'] for d in val])/len(val))
...
... extractor_avgs[extractor] = avg_results
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
('precision avg', 0.8683999024519955)
('precision avg', 0.6211738688863229)
('precision avg', 0.6484059381381134)
('precision avg', 0.7057985767079694)
('precision avg', 0.350690725741593)
('precision avg', 0.04347666120043913)
('precision avg', 0.6286822306460558)
('precision avg', 0.8572444640811792)
('precision avg', 0.8838432933187248)
('precision avg', 0.4369663029782896)
('precision avg', 0.7543074627097117)
('precision avg', 0.672786168820609)
('precision avg', 0.13308241469189427)
('precision avg', 0.7088740876453605)
('precision avg', 0.41742126841582045)
('precision avg', 0.8464097205988462)
('precision avg', 0.4417019947232166)
('precision avg', 0.5021943230481295)
('precision avg', 0.4915998914598782)
('precision avg', 0.578675352037152)
('precision avg', 0.018386000373501205)
('precision avg', 0.4320541005367761)
('precision avg', 0.3029125013356543)
('precision avg', 0.12218070191163449)
('precision avg', 0.4394908403162711)
('precision avg', 0.8753434627782866)
('precision avg', 0.3387610582651177)
('precision avg', 0.855375126938793)
('precision avg', 0.6766215452483197)
('precision avg', 0.3484809556323725)
('precision avg', 0.8919253176470745)
('precision avg', 0.12117487487387163)
('precision avg', 0.660445014823463)
('precision avg', 0.7906462969568482)
('precision avg', 0.406078738764565)
('precision avg', 0.07401168704789596)
('precision avg', 0.06668416878257147)
('precision avg', 0.8797908423638537)
('precision avg', 0.63792601378758)
('precision avg', 0.926445902055539)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
('precision avg', 0.6850494294232871)
('precision avg', 0.6040764658910175)
('precision avg', 0.553762919588722)
('precision avg', 0.6767895607401481)
('precision avg', 0.33316897330572265)
('precision avg', 0.045097010038526314)
('precision avg', 0.6083888210659326)
('precision avg', 0.9250015482359525)
('precision avg', 0.8413135303544912)
('precision avg', 0.30196347934410317)
('precision avg', 0.5811714938921815)
('precision avg', 0.6534683899153207)
('precision avg', 0.11640002710415726)
('precision avg', 0.3535572901091079)
('precision avg', 0.3041230830149431)
('precision avg', 0.7786809001887803)
('precision avg', 0.4186477840737833)
('precision avg', 0.46860182851819593)
('precision avg', 0.4313543594309007)
('precision avg', 0.31975003566238014)
('precision avg', 0.018336170825126605)
('precision avg', 0.33209885398619127)
('precision avg', 0.29940846505010726)
('precision avg', 0.12272875317104835)
('precision avg', 0.4017566608588345)
('precision avg', 0.7997548741274138)
('precision avg', 0.5437722450092386)
('precision avg', 0.8357303273376807)
('precision avg', 0.5177519581452089)
('precision avg', 0.2791851517078164)
('precision avg', 0.5729001117183127)
('precision avg', 0.1207055620925438)
('precision avg', 0.5461973136131762)
('precision avg', 0.7305810594010783)
('precision avg', 0.34039318503958943)
('precision avg', 0.07007420541341315)
('precision avg', 0.06624818281736693)
('precision avg', 0.8673455478551348)
('precision avg', 0.563772151455564)
('precision avg', 0.9210595620529616)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
('precision avg', 0.7085868798180318)
('precision avg', 0.5452230974470502)
('precision avg', 0.5720537517102889)
('precision avg', 0.5996675436639709)
('precision avg', 0.35502210934852385)
('precision avg', 0.044054500032989705)
('precision avg', 0.5986065482125335)
('precision avg', 0.8111021925809357)
('precision avg', 0.8605989019941799)
('precision avg', 0.33226391278577666)
('precision avg', 0.5507998052113364)
('precision avg', 0.6396386293000841)
('precision avg', 0.11919826889537567)
('precision avg', 0.46550867158986875)
('precision avg', 0.3116620725542801)
('precision avg', 0.6700112446095992)
('precision avg', 0.41877132883633295)
('precision avg', 0.45609212979185026)
('precision avg', 0.45163668180184763)
('precision avg', 0.3842242600405546)
('precision avg', 0.018210151381855617)
('precision avg', 0.3597736757896344)
('precision avg', 0.2894186892399318)
('precision avg', 0.11616947582549013)
('precision avg', 0.42023599354375335)
('precision avg', 0.8038458494347899)
('precision avg', 0.3258139976914447)
('precision avg', 0.8370761752694753)
('precision avg', 0.5984474642761501)
('precision avg', 0.2957366925768594)
('precision avg', 0.6203488498249787)
('precision avg', 0.12616883801360262)
('precision avg', 0.6366358379063284)
('precision avg', 0.7878965878803256)
('precision avg', 0.31730289839466586)
('precision avg', 0.07134534027116624)
('precision avg', 0.06563692613337585)
('precision avg', 0.8883697815371147)
('precision avg', 0.5591489354125457)
('precision avg', 0.8962966109245787)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
('precision avg', 0.845171687583807)
('precision avg', 0.6138419519432511)
('precision avg', 0.6604807297526994)
('precision avg', 0.814601457327025)
('precision avg', 0.4108209241176574)
('precision avg', 0.042433386182753455)
('precision avg', 0.62804991992724)
('precision avg', 0.8608073651801277)
('precision avg', 0.8958452423944537)
('precision avg', 0.4429766402972281)
('precision avg', 0.7578331189482941)
('precision avg', 0.6696646303443038)
('precision avg', 0.13348385856834152)
('precision avg', 0.6725911369907592)
('precision avg', 0.41579101137917696)
('precision avg', 0.8307937669693192)
('precision avg', 0.4385329315775029)
('precision avg', 0.5045095303547008)
('precision avg', 0.5713283942566657)
('precision avg', 0.5688487055711114)
('precision avg', 0.018287140557954475)
('precision avg', 0.4253522345143547)
('precision avg', 0.31429130410851036)
('precision avg', 0.13362793010722546)
('precision avg', 0.4775014374694188)
('precision avg', 0.8939153820029523)
('precision avg', 0.3291739634988008)
('precision avg', 0.8780918332144314)
('precision avg', 0.8696766652605129)
('precision avg', 0.34104189997176576)
('precision avg', 0.9269321571721406)
('precision avg', 0.19291701995638091)
('precision avg', 0.6700458372321113)
('precision avg', 0.9004401876670914)
('precision avg', 0.4615173559036399)
('precision avg', 0.07619402011443083)
('precision avg', 0.06688037449997622)
('precision avg', 0.9264909687190436)
('precision avg', 0.647029606678659)
('precision avg', 0.92854392161831)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
('precision avg', 0.42620741281304775)
('precision avg', 0.13467802471805546)
('precision avg', 0.19753138142997795)
('precision avg', 0.36240339186286513)
('precision avg', 0.07315564039653401)
('precision avg', 0.007736638561868903)
('precision avg', 0.5836816702543528)
('precision avg', 0.3108694782857746)
('precision avg', 0.36353215206468886)
('precision avg', 0.04998551810329787)
('precision avg', 0.375579362581543)
('precision avg', 0.19561991680653532)
('precision avg', 0.03774622737251699)
('precision avg', 0.3108646536464451)
('precision avg', 0.060235937357474324)
('precision avg', 0.4101324917837275)
('precision avg', 0.13401296466303134)
('precision avg', 0.05919721060422903)
('precision avg', 0.20247773008213288)
('precision avg', 0.17931260635945423)
('precision avg', 0.013623785917849206)
('precision avg', 0.14134634671225613)
('precision avg', 0.06418395143820504)
('precision avg', 0.03862377662399559)
('precision avg', 0.12564869282111016)
('precision avg', 0.6306054822791173)
('precision avg', 0.08660519833502513)
('precision avg', 0.6510800190870829)
('precision avg', 0.42373560283340894)
('precision avg', 0.09245746028554677)
('precision avg', 0.48511903626399655)
('precision avg', 0.10996307421215494)
('precision avg', 0.26944679283076217)
('precision avg', 0.36475352093750507)
('precision avg', 0.1298783825779457)
('precision avg', 0.01665606784896567)
('precision avg', 0.03007502159405593)
('precision avg', 0.5940573521660849)
('precision avg', 0.29333271544410583)
('precision avg', 0.5122452215303767)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
('precision avg', 0.20901849585870255)
('precision avg', 0.059986740214574584)
('precision avg', 0.10767432608632829)
('precision avg', 0.257515252955112)
('precision avg', 0.04548066527479219)
('precision avg', 0.026359227184905656)
('precision avg', 0.05359185225411894)
('precision avg', 0.259452776657885)
('precision avg', 0.08509745093616063)
('precision avg', 0.05581275411375033)
('precision avg', 0.4335891319208261)
('precision avg', 0.06402753069155678)
('precision avg', 0.0343743719528452)
('precision avg', 0.05855408531900379)
('precision avg', 0.030801142370060597)
('precision avg', 0.1207254953634531)
('precision avg', 0.022684054643112104)
('precision avg', 0.04517924477576478)
('precision avg', 0.17286342176717917)
('precision avg', 0.13330163214667995)
('precision avg', 0.01360822373383575)
('precision avg', 0.10473888905032128)
('precision avg', 0.06889188795616198)
('precision avg', 0.042838971662592956)
('precision avg', 0.09076425485857424)
('precision avg', 0.19892154478650526)
('precision avg', 0.06539045075135168)
('precision avg', 0.7097155883500723)
('precision avg', 0.17945710895699843)
('precision avg', 0.08303484471116346)
('precision avg', 0.20720090593774804)
('precision avg', 0.044021800471251676)
('precision avg', 0.12450890050980233)
('precision avg', 0.3682191437488494)
('precision avg', 0.048415999768495664)
('precision avg', 0.019098335185455066)
('precision avg', 0.029511317676966412)
('precision avg', 0.13596091613435268)
('precision avg', 0.06628139954248631)
('precision avg', 0.15894624504628735)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
('precision avg', 0.8087223234480646)
('precision avg', 0.6246431645192004)
('precision avg', 0.6564277365163304)
('precision avg', 0.8548475040578897)
('precision avg', 0.3866449923138486)
('precision avg', 0.04407018165599923)
('precision avg', 0.6305525928335406)
('precision avg', 0.8586431262759081)
('precision avg', 0.9094951163901436)
('precision avg', 0.43042619558685963)
('precision avg', 0.7143877357447221)
('precision avg', 0.6714638467737778)
('precision avg', 0.13167702125754785)
('precision avg', 0.6347119061510296)
('precision avg', 0.4218849025093671)
('precision avg', 0.8394696101599314)
('precision avg', 0.41909133109112984)
('precision avg', 0.49137907199335934)
('precision avg', 0.5434962294760681)
('precision avg', 0.5255237109769181)
('precision avg', 0.018207304858120263)
('precision avg', 0.41096334652475897)
('precision avg', 0.2995482008650643)
('precision avg', 0.12943625842051695)
('precision avg', 0.46851571662688785)
('precision avg', 0.8730005208941793)
('precision avg', 0.32624183171026927)
('precision avg', 0.87037309891489)
('precision avg', 0.8903295054227871)
('precision avg', 0.3247401991305493)
('precision avg', 0.9404262665672861)
('precision avg', 0.18612609604198724)
('precision avg', 0.6832190050181055)
('precision avg', 0.8503041167751857)
('precision avg', 0.3885914367069556)
('precision avg', 0.07223171179425797)
('precision avg', 0.06655548974343942)
('precision avg', 0.9018062702104928)
('precision avg', 0.6187842974937978)
('precision avg', 0.9281173744128661)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
('precision avg', 0.624509586529224)
('precision avg', 0.5211798048500964)
('precision avg', 0.47413698373950586)
('precision avg', 0.7639870392099527)
('precision avg', 0.30168348382925414)
('precision avg', 0.02016864363656134)
('precision avg', 0.5780249782557191)
('precision avg', 0.8533664269391111)
('precision avg', 0.7898786198862732)
('precision avg', 0.2962800934568883)
('precision avg', 0.6090391904597992)
('precision avg', 0.5711755450519884)
('precision avg', 0.12364045968228651)
('precision avg', 0.3897890357149691)
('precision avg', 0.4059495413065444)
('precision avg', 0.5947838565979695)
('precision avg', 0.3002877518789036)
('precision avg', 0.4305324047938475)
('precision avg', 0.43766238053965867)
('precision avg', 0.44113596091377016)
('precision avg', 0.01726963014185236)
('precision avg', 0.40366920898565617)
('precision avg', 0.2791625929367795)
('precision avg', 0.09226749054973236)
('precision avg', 0.3291085018457033)
('precision avg', 0.7523577774183531)
('precision avg', 0.2933510531370516)
('precision avg', 0.7789060527515866)
('precision avg', 0.4781585844754011)
('precision avg', 0.27262554887522134)
('precision avg', 0.8106990701712056)
('precision avg', 0.17948716199317488)
('precision avg', 0.46850561527616924)
('precision avg', 0.7429835013444328)
('precision avg', 0.26444680402070014)
('precision avg', 0.06978700649645515)
('precision avg', 0.06346258311336817)
('precision avg', 0.88178763991206)
('precision avg', 0.5501415443668539)
('precision avg', 0.6723481555542996)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
('precision avg', 0.582312988541893)
('precision avg', 0.5193456459563168)
('precision avg', 0.5907695245619571)
('precision avg', 0.6515399864573219)
('precision avg', 0.37115173971617904)
('precision avg', 0.043716349444529276)
('precision avg', 0.595289193234168)
('precision avg', 0.7738884403357201)
('precision avg', 0.7644592804937886)
('precision avg', 0.3128787204252841)
('precision avg', 0.5532927097447694)
('precision avg', 0.5907915319816335)
('precision avg', 0.12744554000908173)
('precision avg', 0.440919472040343)
('precision avg', 0.35022956666455196)
('precision avg', 0.7026935512166758)
('precision avg', 0.408552851894306)
('precision avg', 0.49478343796012586)
('precision avg', 0.46639164402721606)
('precision avg', 0.42375700219940554)
('precision avg', 0.018395332008463647)
('precision avg', 0.34478378415001126)
('precision avg', 0.29387452934098823)
('precision avg', 0.12303070023234008)
('precision avg', 0.4339480725003696)
('precision avg', 0.784355741068858)
('precision avg', 0.3340192053349481)
('precision avg', 0.7817656558124935)
('precision avg', 0.6212194798442074)
('precision avg', 0.3002309261235297)
('precision avg', 0.6426368875256027)
('precision avg', 0.18114137356170518)
('precision avg', 0.6597499715250379)
('precision avg', 0.7599538973666159)
('precision avg', 0.3236012443738165)
('precision avg', 0.06974145722281835)
('precision avg', 0.06628174494498319)
('precision avg', 0.8369926820075588)
('precision avg', 0.5302978779309909)
('precision avg', 0.8564583004210408)
[About 368 more lines. Double-click to unfold]
>>> extractor_avgs['BodyTextExtractor2Filter']
29: {'entertainment.msn.com': {'2000': {'avg_accuracy': 0.9244090539071942,
'avg_f1': 0.846137936359689,
'avg_precision': 0.8464097205988462,
'avg_recall': 0.8503924945577224},
'2005': {'avg_accuracy': 0.6483814493886327,
'avg_f1': 0.5736756061820752,
'avg_precision': 0.4417019947232166,
'avg_recall': 0.8815812072871462},
'2010': {'avg_accuracy': 0.5739534002357349,
'avg_f1': 0.5096672963521531,
'avg_precision': 0.406078738764565,
'avg_recall': 0.8837208661044244},
'2015': {'avg_accuracy': 0.5518234073818731,
'avg_f1': 0.1322736889940892,
'avg_precision': 0.07401168704789596,
'avg_recall': 0.8043228823612675}},
'news.bbc.co.uk': {'2000': {'avg_accuracy': 0.9021006354735241,
'avg_f1': 0.8647821118015534,
'avg_precision': 0.8919253176470745,
'avg_recall': 0.8401801928987451},
'2005': {'avg_accuracy': 0.8099502557304525,
'avg_f1': 0.7565121769831742,
'avg_precision': 0.6766215452483197,
'avg_recall': 0.8715406400104121},
'2010': {'avg_accuracy': 0.73532761388079,
'avg_f1': 0.5205747113910343,
'avg_precision': 0.41742126841582045,
'avg_recall': 0.7303742669478774},
'2015': {'avg_accuracy': 0.5565193185460882,
'avg_f1': 0.22151052512251035,
'avg_precision': 0.13308241469189427,
'avg_recall': 0.7304439153776018}},
'news.yahoo.com': {'2000': {'avg_accuracy': 0.8906547313136641,
'avg_f1': 0.8654332302102575,
'avg_precision': 0.8683999024519955,
'avg_recall': 0.8638322817894927},
'2005': {'avg_accuracy': 0.7924984710068734,
'avg_f1': 0.7383258111263864,
'avg_precision': 0.6484059381381134,
'avg_recall': 0.8848410469684321},
'2010': {'avg_accuracy': 0.7999153102686466,
'avg_f1': 0.6558914318984841,
'avg_precision': 0.578675352037152,
'avg_recall': 0.7757993179137981},
'2015': {'avg_accuracy': 0.3219588516463774,
'avg_f1': 0.03564246563861722,
'avg_precision': 0.018386000373501205,
'avg_recall': 0.686399968694185}},
'thenation.com': {'2000': {'avg_accuracy': 0.8954149201859337,
'avg_f1': 0.918658715490274,
'avg_precision': 0.8797908423638537,
'avg_recall': 0.9652013434871509},
'2005': {'avg_accuracy': 0.804806209394557,
'avg_f1': 0.8561489426227454,
'avg_precision': 0.7906462969568482,
'avg_recall': 0.9495652685159028},
'2010': {'avg_accuracy': 0.6740374139494874,
'avg_f1': 0.6136605471301677,
'avg_precision': 0.4915998914598782,
'avg_recall': 0.867987776825755},
'2015': {'avg_accuracy': 0.6543885449100943,
'avg_f1': 0.6190670055430223,
'avg_precision': 0.5021943230481295,
'avg_recall': 0.8572473843198571}},
'www.cnn.com': {'2000': {'avg_accuracy': 0.800733747469661,
'avg_f1': 0.7688012107244725,
'avg_precision': 0.7057985767079694,
'avg_recall': 0.864311328238917},
'2005': {'avg_accuracy': 0.8096439361653318,
'avg_f1': 0.7071877225133284,
'avg_precision': 0.6211738688863229,
'avg_recall': 0.8286779162880316},
'2010': {'avg_accuracy': 0.6084899901455145,
'avg_f1': 0.5400160577031479,
'avg_precision': 0.4320541005367761,
'avg_recall': 0.8017286221537236},
'2015': {'avg_accuracy': 0.40317409919155567,
'avg_f1': 0.4324038600619751,
'avg_precision': 0.3029125013356543,
'avg_recall': 0.865448187557186}},
'www.esquire.com': {'2000': {'avg_accuracy': 0.9074614289477201,
'avg_f1': 0.8757034024592896,
'avg_precision': 0.8838432933187248,
'avg_recall': 0.8852199980453179},
'2005': {'avg_accuracy': 0.9454220019060875,
'avg_f1': 0.9306781924884957,
'avg_precision': 0.926445902055539,
'avg_recall': 0.9361114008251437},
'2010': {'avg_accuracy': 0.5462068777107696,
'avg_f1': 0.4820745041640184,
'avg_precision': 0.350690725741593,
'avg_recall': 0.863806557191218},
'2015': {'avg_accuracy': 0.33177659602885284,
'avg_f1': 0.11513009883762236,
'avg_precision': 0.06668416878257147,
'avg_recall': 0.7739857956252141}},
'www.forbes.com': {'2000': {'avg_accuracy': 0.7794584406932229,
'avg_f1': 0.7625513206364645,
'avg_precision': 0.672786168820609,
'avg_recall': 0.9016544964695659},
'2005': {'avg_accuracy': 0.9168198868029921,
'avg_f1': 0.7443993511594685,
'avg_precision': 0.7088740876453605,
'avg_recall': 0.7883061719247125},
'2010': {'avg_accuracy': 0.8116505246063985,
'avg_f1': 0.7375315583874413,
'avg_precision': 0.660445014823463,
'avg_recall': 0.8718069249651257},
'2015': {'avg_accuracy': 0.5365559659335577,
'avg_f1': 0.48072279957902986,
'avg_precision': 0.3484809556323725,
'avg_recall': 0.8283171986084517}},
'www.foxnews.com': {'2000': {'avg_accuracy': 0.9075440551901525,
'avg_f1': 0.8874059086393135,
'avg_precision': 0.8753434627782866,
'avg_recall': 0.9260015291417278},
'2005': {'avg_accuracy': 0.49737059246242404,
'avg_f1': 0.48511616080858905,
'avg_precision': 0.3387610582651177,
'avg_recall': 0.9414502698019317},
'2010': {'avg_accuracy': 0.744614672197503,
'avg_f1': 0.5517263659638048,
'avg_precision': 0.4369663029782896,
'avg_recall': 0.8049231112015547},
'2015': {'avg_accuracy': 0.8435262483444723,
'avg_f1': 0.7897416404006864,
'avg_precision': 0.7543074627097117,
'avg_recall': 0.8328436054890792}},
'www.latimes.com': {'2000': {'avg_accuracy': 0.8802343747738629,
'avg_f1': 0.8972320848398079,
'avg_precision': 0.8572444640811792,
'avg_recall': 0.9430623481664986},
'2005': {'avg_accuracy': 0.7569839835615282,
'avg_f1': 0.7284191666091211,
'avg_precision': 0.6286822306460558,
'avg_recall': 0.894640252155387},
'2010': {'avg_accuracy': 0.681661606253343,
'avg_f1': 0.5428877399677827,
'avg_precision': 0.4394908403162711,
'avg_recall': 0.8076022084890088},
'2015': {'avg_accuracy': 0.4928797528462945,
'avg_f1': 0.21086178590786336,
'avg_precision': 0.12218070191163449,
'avg_recall': 0.8331923688617182}},
'www.nymag.com': {'2000': {'avg_accuracy': 0.8992183949804482,
'avg_f1': 0.8825630519989532,
'avg_precision': 0.855375126938793,
'avg_recall': 0.9395867057441535},
'2005': {'avg_accuracy': 0.7500215514070161,
'avg_f1': 0.7409493649922384,
'avg_precision': 0.63792601378758,
'avg_recall': 0.8938914698626994},
'2010': {'avg_accuracy': 0.3941642691060131,
'avg_f1': 0.19293172049444848,
'avg_precision': 0.12117487487387163,
'avg_recall': 0.6389370425852912},
'2015': {'avg_accuracy': 0.163241251572618,
'avg_f1': 0.06427847328424839,
'avg_precision': 0.04347666120043913,
'avg_recall': 0.5388911459138439}}}
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
... print("precision avg", sum([d['p'] for d in val])/len(val))
...
... extractor_avgs[extractor] = avg_results
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
[About 25 more lines. Double-click to unfold]
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
('precision avg', 0.8683999024519955)
('precision avg', 0.6211738688863229)
('precision avg', 0.6484059381381134)
('precision avg', 0.7057985767079694)
('precision avg', 0.350690725741593)
('precision avg', 0.04347666120043913)
('precision avg', 0.6286822306460558)
('precision avg', 0.8572444640811792)
('precision avg', 0.8838432933187248)
('precision avg', 0.4369663029782896)
('precision avg', 0.7543074627097117)
('precision avg', 0.672786168820609)
('precision avg', 0.13308241469189427)
('precision avg', 0.7088740876453605)
('precision avg', 0.41742126841582045)
('precision avg', 0.8464097205988462)
('precision avg', 0.4417019947232166)
('precision avg', 0.5021943230481295)
('precision avg', 0.4915998914598782)
('precision avg', 0.578675352037152)
('precision avg', 0.018386000373501205)
('precision avg', 0.4320541005367761)
('precision avg', 0.3029125013356543)
('precision avg', 0.12218070191163449)
('precision avg', 0.4394908403162711)
('precision avg', 0.8753434627782866)
('precision avg', 0.3387610582651177)
('precision avg', 0.855375126938793)
('precision avg', 0.6766215452483197)
('precision avg', 0.3484809556323725)
('precision avg', 0.8919253176470745)
('precision avg', 0.12117487487387163)
('precision avg', 0.660445014823463)
('precision avg', 0.7906462969568482)
('precision avg', 0.406078738764565)
('precision avg', 0.07401168704789596)
('precision avg', 0.06668416878257147)
('precision avg', 0.8797908423638537)
('precision avg', 0.63792601378758)
('precision avg', 0.926445902055539)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
('precision avg', 0.6850494294232871)
('precision avg', 0.6040764658910175)
('precision avg', 0.553762919588722)
('precision avg', 0.6767895607401481)
('precision avg', 0.33316897330572265)
('precision avg', 0.045097010038526314)
('precision avg', 0.6083888210659326)
('precision avg', 0.9250015482359525)
('precision avg', 0.8413135303544912)
('precision avg', 0.30196347934410317)
('precision avg', 0.5811714938921815)
('precision avg', 0.6534683899153207)
('precision avg', 0.11640002710415726)
('precision avg', 0.3535572901091079)
('precision avg', 0.3041230830149431)
('precision avg', 0.7786809001887803)
('precision avg', 0.4186477840737833)
('precision avg', 0.46860182851819593)
('precision avg', 0.4313543594309007)
('precision avg', 0.31975003566238014)
('precision avg', 0.018336170825126605)
('precision avg', 0.33209885398619127)
('precision avg', 0.29940846505010726)
('precision avg', 0.12272875317104835)
('precision avg', 0.4017566608588345)
('precision avg', 0.7997548741274138)
('precision avg', 0.5437722450092386)
('precision avg', 0.8357303273376807)
('precision avg', 0.5177519581452089)
('precision avg', 0.2791851517078164)
('precision avg', 0.5729001117183127)
('precision avg', 0.1207055620925438)
('precision avg', 0.5461973136131762)
('precision avg', 0.7305810594010783)
('precision avg', 0.34039318503958943)
('precision avg', 0.07007420541341315)
('precision avg', 0.06624818281736693)
('precision avg', 0.8673455478551348)
('precision avg', 0.563772151455564)
('precision avg', 0.9210595620529616)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
('precision avg', 0.7085868798180318)
('precision avg', 0.5452230974470502)
('precision avg', 0.5720537517102889)
('precision avg', 0.5996675436639709)
('precision avg', 0.35502210934852385)
('precision avg', 0.044054500032989705)
('precision avg', 0.5986065482125335)
('precision avg', 0.8111021925809357)
('precision avg', 0.8605989019941799)
('precision avg', 0.33226391278577666)
('precision avg', 0.5507998052113364)
('precision avg', 0.6396386293000841)
('precision avg', 0.11919826889537567)
('precision avg', 0.46550867158986875)
('precision avg', 0.3116620725542801)
('precision avg', 0.6700112446095992)
('precision avg', 0.41877132883633295)
('precision avg', 0.45609212979185026)
('precision avg', 0.45163668180184763)
('precision avg', 0.3842242600405546)
('precision avg', 0.018210151381855617)
('precision avg', 0.3597736757896344)
('precision avg', 0.2894186892399318)
('precision avg', 0.11616947582549013)
('precision avg', 0.42023599354375335)
('precision avg', 0.8038458494347899)
('precision avg', 0.3258139976914447)
('precision avg', 0.8370761752694753)
('precision avg', 0.5984474642761501)
('precision avg', 0.2957366925768594)
('precision avg', 0.6203488498249787)
('precision avg', 0.12616883801360262)
('precision avg', 0.6366358379063284)
('precision avg', 0.7878965878803256)
('precision avg', 0.31730289839466586)
('precision avg', 0.07134534027116624)
('precision avg', 0.06563692613337585)
('precision avg', 0.8883697815371147)
('precision avg', 0.5591489354125457)
('precision avg', 0.8962966109245787)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
('precision avg', 0.845171687583807)
('precision avg', 0.6138419519432511)
('precision avg', 0.6604807297526994)
('precision avg', 0.814601457327025)
('precision avg', 0.4108209241176574)
('precision avg', 0.042433386182753455)
('precision avg', 0.62804991992724)
('precision avg', 0.8608073651801277)
('precision avg', 0.8958452423944537)
('precision avg', 0.4429766402972281)
('precision avg', 0.7578331189482941)
('precision avg', 0.6696646303443038)
('precision avg', 0.13348385856834152)
('precision avg', 0.6725911369907592)
('precision avg', 0.41579101137917696)
('precision avg', 0.8307937669693192)
('precision avg', 0.4385329315775029)
('precision avg', 0.5045095303547008)
('precision avg', 0.5713283942566657)
('precision avg', 0.5688487055711114)
('precision avg', 0.018287140557954475)
('precision avg', 0.4253522345143547)
('precision avg', 0.31429130410851036)
('precision avg', 0.13362793010722546)
('precision avg', 0.4775014374694188)
('precision avg', 0.8939153820029523)
('precision avg', 0.3291739634988008)
('precision avg', 0.8780918332144314)
('precision avg', 0.8696766652605129)
('precision avg', 0.34104189997176576)
('precision avg', 0.9269321571721406)
('precision avg', 0.19291701995638091)
('precision avg', 0.6700458372321113)
('precision avg', 0.9004401876670914)
('precision avg', 0.4615173559036399)
('precision avg', 0.07619402011443083)
('precision avg', 0.06688037449997622)
('precision avg', 0.9264909687190436)
('precision avg', 0.647029606678659)
('precision avg', 0.92854392161831)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
('precision avg', 0.42620741281304775)
('precision avg', 0.13467802471805546)
('precision avg', 0.19753138142997795)
('precision avg', 0.36240339186286513)
('precision avg', 0.07315564039653401)
('precision avg', 0.007736638561868903)
('precision avg', 0.5836816702543528)
('precision avg', 0.3108694782857746)
('precision avg', 0.36353215206468886)
('precision avg', 0.04998551810329787)
('precision avg', 0.375579362581543)
('precision avg', 0.19561991680653532)
('precision avg', 0.03774622737251699)
('precision avg', 0.3108646536464451)
('precision avg', 0.060235937357474324)
('precision avg', 0.4101324917837275)
('precision avg', 0.13401296466303134)
('precision avg', 0.05919721060422903)
('precision avg', 0.20247773008213288)
('precision avg', 0.17931260635945423)
('precision avg', 0.013623785917849206)
('precision avg', 0.14134634671225613)
('precision avg', 0.06418395143820504)
('precision avg', 0.03862377662399559)
('precision avg', 0.12564869282111016)
('precision avg', 0.6306054822791173)
('precision avg', 0.08660519833502513)
('precision avg', 0.6510800190870829)
('precision avg', 0.42373560283340894)
('precision avg', 0.09245746028554677)
('precision avg', 0.48511903626399655)
('precision avg', 0.10996307421215494)
('precision avg', 0.26944679283076217)
('precision avg', 0.36475352093750507)
('precision avg', 0.1298783825779457)
('precision avg', 0.01665606784896567)
('precision avg', 0.03007502159405593)
('precision avg', 0.5940573521660849)
('precision avg', 0.29333271544410583)
('precision avg', 0.5122452215303767)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
('precision avg', 0.20901849585870255)
('precision avg', 0.059986740214574584)
('precision avg', 0.10767432608632829)
('precision avg', 0.257515252955112)
('precision avg', 0.04548066527479219)
('precision avg', 0.026359227184905656)
('precision avg', 0.05359185225411894)
('precision avg', 0.259452776657885)
('precision avg', 0.08509745093616063)
('precision avg', 0.05581275411375033)
('precision avg', 0.4335891319208261)
('precision avg', 0.06402753069155678)
('precision avg', 0.0343743719528452)
('precision avg', 0.05855408531900379)
('precision avg', 0.030801142370060597)
('precision avg', 0.1207254953634531)
('precision avg', 0.022684054643112104)
('precision avg', 0.04517924477576478)
('precision avg', 0.17286342176717917)
('precision avg', 0.13330163214667995)
('precision avg', 0.01360822373383575)
('precision avg', 0.10473888905032128)
('precision avg', 0.06889188795616198)
('precision avg', 0.042838971662592956)
('precision avg', 0.09076425485857424)
('precision avg', 0.19892154478650526)
('precision avg', 0.06539045075135168)
('precision avg', 0.7097155883500723)
('precision avg', 0.17945710895699843)
('precision avg', 0.08303484471116346)
('precision avg', 0.20720090593774804)
('precision avg', 0.044021800471251676)
('precision avg', 0.12450890050980233)
('precision avg', 0.3682191437488494)
('precision avg', 0.048415999768495664)
('precision avg', 0.019098335185455066)
('precision avg', 0.029511317676966412)
('precision avg', 0.13596091613435268)
('precision avg', 0.06628139954248631)
('precision avg', 0.15894624504628735)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
('precision avg', 0.8087223234480646)
('precision avg', 0.6246431645192004)
('precision avg', 0.6564277365163304)
('precision avg', 0.8548475040578897)
('precision avg', 0.3866449923138486)
('precision avg', 0.04407018165599923)
('precision avg', 0.6305525928335406)
('precision avg', 0.8586431262759081)
('precision avg', 0.9094951163901436)
('precision avg', 0.43042619558685963)
('precision avg', 0.7143877357447221)
('precision avg', 0.6714638467737778)
('precision avg', 0.13167702125754785)
('precision avg', 0.6347119061510296)
('precision avg', 0.4218849025093671)
('precision avg', 0.8394696101599314)
('precision avg', 0.41909133109112984)
('precision avg', 0.49137907199335934)
('precision avg', 0.5434962294760681)
('precision avg', 0.5255237109769181)
('precision avg', 0.018207304858120263)
('precision avg', 0.41096334652475897)
('precision avg', 0.2995482008650643)
('precision avg', 0.12943625842051695)
('precision avg', 0.46851571662688785)
('precision avg', 0.8730005208941793)
('precision avg', 0.32624183171026927)
('precision avg', 0.87037309891489)
('precision avg', 0.8903295054227871)
('precision avg', 0.3247401991305493)
('precision avg', 0.9404262665672861)
('precision avg', 0.18612609604198724)
('precision avg', 0.6832190050181055)
('precision avg', 0.8503041167751857)
('precision avg', 0.3885914367069556)
('precision avg', 0.07223171179425797)
('precision avg', 0.06655548974343942)
('precision avg', 0.9018062702104928)
('precision avg', 0.6187842974937978)
('precision avg', 0.9281173744128661)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
('precision avg', 0.624509586529224)
('precision avg', 0.5211798048500964)
('precision avg', 0.47413698373950586)
('precision avg', 0.7639870392099527)
('precision avg', 0.30168348382925414)
('precision avg', 0.02016864363656134)
('precision avg', 0.5780249782557191)
('precision avg', 0.8533664269391111)
('precision avg', 0.7898786198862732)
('precision avg', 0.2962800934568883)
('precision avg', 0.6090391904597992)
('precision avg', 0.5711755450519884)
('precision avg', 0.12364045968228651)
('precision avg', 0.3897890357149691)
('precision avg', 0.4059495413065444)
('precision avg', 0.5947838565979695)
('precision avg', 0.3002877518789036)
('precision avg', 0.4305324047938475)
('precision avg', 0.43766238053965867)
('precision avg', 0.44113596091377016)
('precision avg', 0.01726963014185236)
('precision avg', 0.40366920898565617)
('precision avg', 0.2791625929367795)
('precision avg', 0.09226749054973236)
('precision avg', 0.3291085018457033)
('precision avg', 0.7523577774183531)
('precision avg', 0.2933510531370516)
('precision avg', 0.7789060527515866)
('precision avg', 0.4781585844754011)
('precision avg', 0.27262554887522134)
('precision avg', 0.8106990701712056)
('precision avg', 0.17948716199317488)
('precision avg', 0.46850561527616924)
('precision avg', 0.7429835013444328)
('precision avg', 0.26444680402070014)
('precision avg', 0.06978700649645515)
('precision avg', 0.06346258311336817)
('precision avg', 0.88178763991206)
('precision avg', 0.5501415443668539)
('precision avg', 0.6723481555542996)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
('precision avg', 0.582312988541893)
('precision avg', 0.5193456459563168)
('precision avg', 0.5907695245619571)
('precision avg', 0.6515399864573219)
('precision avg', 0.37115173971617904)
('precision avg', 0.043716349444529276)
('precision avg', 0.595289193234168)
('precision avg', 0.7738884403357201)
('precision avg', 0.7644592804937886)
('precision avg', 0.3128787204252841)
('precision avg', 0.5532927097447694)
('precision avg', 0.5907915319816335)
('precision avg', 0.12744554000908173)
('precision avg', 0.440919472040343)
('precision avg', 0.35022956666455196)
('precision avg', 0.7026935512166758)
('precision avg', 0.408552851894306)
('precision avg', 0.49478343796012586)
('precision avg', 0.46639164402721606)
('precision avg', 0.42375700219940554)
('precision avg', 0.018395332008463647)
('precision avg', 0.34478378415001126)
('precision avg', 0.29387452934098823)
('precision avg', 0.12303070023234008)
('precision avg', 0.4339480725003696)
('precision avg', 0.784355741068858)
('precision avg', 0.3340192053349481)
('precision avg', 0.7817656558124935)
('precision avg', 0.6212194798442074)
('precision avg', 0.3002309261235297)
('precision avg', 0.6426368875256027)
('precision avg', 0.18114137356170518)
('precision avg', 0.6597499715250379)
('precision avg', 0.7599538973666159)
('precision avg', 0.3236012443738165)
('precision avg', 0.06974145722281835)
('precision avg', 0.06628174494498319)
('precision avg', 0.8369926820075588)
('precision avg', 0.5302978779309909)
('precision avg', 0.8564583004210408)
[About 368 more lines. Double-click to unfold]
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
... print("precision avg", sum([d['p'] for d in val])/len(val))
...
... extractor_avgs[extractor] = avg_results
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
...
... pickle.dump(extractor_avgs,open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/extractor_avgs.pkl','wb'))
[About 27 more lines. Double-click to unfold]
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
('precision avg', 0.8683999024519955)
('precision avg', 0.6211738688863229)
('precision avg', 0.6484059381381134)
('precision avg', 0.7057985767079694)
('precision avg', 0.350690725741593)
('precision avg', 0.04347666120043913)
('precision avg', 0.6286822306460558)
('precision avg', 0.8572444640811792)
('precision avg', 0.8838432933187248)
('precision avg', 0.4369663029782896)
('precision avg', 0.7543074627097117)
('precision avg', 0.672786168820609)
('precision avg', 0.13308241469189427)
('precision avg', 0.7088740876453605)
('precision avg', 0.41742126841582045)
('precision avg', 0.8464097205988462)
('precision avg', 0.4417019947232166)
('precision avg', 0.5021943230481295)
('precision avg', 0.4915998914598782)
('precision avg', 0.578675352037152)
('precision avg', 0.018386000373501205)
('precision avg', 0.4320541005367761)
('precision avg', 0.3029125013356543)
('precision avg', 0.12218070191163449)
('precision avg', 0.4394908403162711)
('precision avg', 0.8753434627782866)
('precision avg', 0.3387610582651177)
('precision avg', 0.855375126938793)
('precision avg', 0.6766215452483197)
('precision avg', 0.3484809556323725)
('precision avg', 0.8919253176470745)
('precision avg', 0.12117487487387163)
('precision avg', 0.660445014823463)
('precision avg', 0.7906462969568482)
('precision avg', 0.406078738764565)
('precision avg', 0.07401168704789596)
('precision avg', 0.06668416878257147)
('precision avg', 0.8797908423638537)
('precision avg', 0.63792601378758)
('precision avg', 0.926445902055539)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
('precision avg', 0.6850494294232871)
('precision avg', 0.6040764658910175)
('precision avg', 0.553762919588722)
('precision avg', 0.6767895607401481)
('precision avg', 0.33316897330572265)
('precision avg', 0.045097010038526314)
('precision avg', 0.6083888210659326)
('precision avg', 0.9250015482359525)
('precision avg', 0.8413135303544912)
('precision avg', 0.30196347934410317)
('precision avg', 0.5811714938921815)
('precision avg', 0.6534683899153207)
('precision avg', 0.11640002710415726)
('precision avg', 0.3535572901091079)
('precision avg', 0.3041230830149431)
('precision avg', 0.7786809001887803)
('precision avg', 0.4186477840737833)
('precision avg', 0.46860182851819593)
('precision avg', 0.4313543594309007)
('precision avg', 0.31975003566238014)
('precision avg', 0.018336170825126605)
('precision avg', 0.33209885398619127)
('precision avg', 0.29940846505010726)
('precision avg', 0.12272875317104835)
('precision avg', 0.4017566608588345)
('precision avg', 0.7997548741274138)
('precision avg', 0.5437722450092386)
('precision avg', 0.8357303273376807)
('precision avg', 0.5177519581452089)
('precision avg', 0.2791851517078164)
('precision avg', 0.5729001117183127)
('precision avg', 0.1207055620925438)
('precision avg', 0.5461973136131762)
('precision avg', 0.7305810594010783)
('precision avg', 0.34039318503958943)
('precision avg', 0.07007420541341315)
('precision avg', 0.06624818281736693)
('precision avg', 0.8673455478551348)
('precision avg', 0.563772151455564)
('precision avg', 0.9210595620529616)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
('precision avg', 0.7085868798180318)
('precision avg', 0.5452230974470502)
('precision avg', 0.5720537517102889)
('precision avg', 0.5996675436639709)
('precision avg', 0.35502210934852385)
('precision avg', 0.044054500032989705)
('precision avg', 0.5986065482125335)
('precision avg', 0.8111021925809357)
('precision avg', 0.8605989019941799)
('precision avg', 0.33226391278577666)
('precision avg', 0.5507998052113364)
('precision avg', 0.6396386293000841)
('precision avg', 0.11919826889537567)
('precision avg', 0.46550867158986875)
('precision avg', 0.3116620725542801)
('precision avg', 0.6700112446095992)
('precision avg', 0.41877132883633295)
('precision avg', 0.45609212979185026)
('precision avg', 0.45163668180184763)
('precision avg', 0.3842242600405546)
('precision avg', 0.018210151381855617)
('precision avg', 0.3597736757896344)
('precision avg', 0.2894186892399318)
('precision avg', 0.11616947582549013)
('precision avg', 0.42023599354375335)
('precision avg', 0.8038458494347899)
('precision avg', 0.3258139976914447)
('precision avg', 0.8370761752694753)
('precision avg', 0.5984474642761501)
('precision avg', 0.2957366925768594)
('precision avg', 0.6203488498249787)
('precision avg', 0.12616883801360262)
('precision avg', 0.6366358379063284)
('precision avg', 0.7878965878803256)
('precision avg', 0.31730289839466586)
('precision avg', 0.07134534027116624)
('precision avg', 0.06563692613337585)
('precision avg', 0.8883697815371147)
('precision avg', 0.5591489354125457)
('precision avg', 0.8962966109245787)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
('precision avg', 0.845171687583807)
('precision avg', 0.6138419519432511)
('precision avg', 0.6604807297526994)
('precision avg', 0.814601457327025)
('precision avg', 0.4108209241176574)
('precision avg', 0.042433386182753455)
('precision avg', 0.62804991992724)
('precision avg', 0.8608073651801277)
('precision avg', 0.8958452423944537)
('precision avg', 0.4429766402972281)
('precision avg', 0.7578331189482941)
('precision avg', 0.6696646303443038)
('precision avg', 0.13348385856834152)
('precision avg', 0.6725911369907592)
('precision avg', 0.41579101137917696)
('precision avg', 0.8307937669693192)
('precision avg', 0.4385329315775029)
('precision avg', 0.5045095303547008)
('precision avg', 0.5713283942566657)
('precision avg', 0.5688487055711114)
('precision avg', 0.018287140557954475)
('precision avg', 0.4253522345143547)
('precision avg', 0.31429130410851036)
('precision avg', 0.13362793010722546)
('precision avg', 0.4775014374694188)
('precision avg', 0.8939153820029523)
('precision avg', 0.3291739634988008)
('precision avg', 0.8780918332144314)
('precision avg', 0.8696766652605129)
('precision avg', 0.34104189997176576)
('precision avg', 0.9269321571721406)
('precision avg', 0.19291701995638091)
('precision avg', 0.6700458372321113)
('precision avg', 0.9004401876670914)
('precision avg', 0.4615173559036399)
('precision avg', 0.07619402011443083)
('precision avg', 0.06688037449997622)
('precision avg', 0.9264909687190436)
('precision avg', 0.647029606678659)
('precision avg', 0.92854392161831)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
('precision avg', 0.42620741281304775)
('precision avg', 0.13467802471805546)
('precision avg', 0.19753138142997795)
('precision avg', 0.36240339186286513)
('precision avg', 0.07315564039653401)
('precision avg', 0.007736638561868903)
('precision avg', 0.5836816702543528)
('precision avg', 0.3108694782857746)
('precision avg', 0.36353215206468886)
('precision avg', 0.04998551810329787)
('precision avg', 0.375579362581543)
('precision avg', 0.19561991680653532)
('precision avg', 0.03774622737251699)
('precision avg', 0.3108646536464451)
('precision avg', 0.060235937357474324)
('precision avg', 0.4101324917837275)
('precision avg', 0.13401296466303134)
('precision avg', 0.05919721060422903)
('precision avg', 0.20247773008213288)
('precision avg', 0.17931260635945423)
('precision avg', 0.013623785917849206)
('precision avg', 0.14134634671225613)
('precision avg', 0.06418395143820504)
('precision avg', 0.03862377662399559)
('precision avg', 0.12564869282111016)
('precision avg', 0.6306054822791173)
('precision avg', 0.08660519833502513)
('precision avg', 0.6510800190870829)
('precision avg', 0.42373560283340894)
('precision avg', 0.09245746028554677)
('precision avg', 0.48511903626399655)
('precision avg', 0.10996307421215494)
('precision avg', 0.26944679283076217)
('precision avg', 0.36475352093750507)
('precision avg', 0.1298783825779457)
('precision avg', 0.01665606784896567)
('precision avg', 0.03007502159405593)
('precision avg', 0.5940573521660849)
('precision avg', 0.29333271544410583)
('precision avg', 0.5122452215303767)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
('precision avg', 0.20901849585870255)
('precision avg', 0.059986740214574584)
('precision avg', 0.10767432608632829)
('precision avg', 0.257515252955112)
('precision avg', 0.04548066527479219)
('precision avg', 0.026359227184905656)
('precision avg', 0.05359185225411894)
('precision avg', 0.259452776657885)
('precision avg', 0.08509745093616063)
('precision avg', 0.05581275411375033)
('precision avg', 0.4335891319208261)
('precision avg', 0.06402753069155678)
('precision avg', 0.0343743719528452)
('precision avg', 0.05855408531900379)
('precision avg', 0.030801142370060597)
('precision avg', 0.1207254953634531)
('precision avg', 0.022684054643112104)
('precision avg', 0.04517924477576478)
('precision avg', 0.17286342176717917)
('precision avg', 0.13330163214667995)
('precision avg', 0.01360822373383575)
('precision avg', 0.10473888905032128)
('precision avg', 0.06889188795616198)
('precision avg', 0.042838971662592956)
('precision avg', 0.09076425485857424)
('precision avg', 0.19892154478650526)
('precision avg', 0.06539045075135168)
('precision avg', 0.7097155883500723)
('precision avg', 0.17945710895699843)
('precision avg', 0.08303484471116346)
('precision avg', 0.20720090593774804)
('precision avg', 0.044021800471251676)
('precision avg', 0.12450890050980233)
('precision avg', 0.3682191437488494)
('precision avg', 0.048415999768495664)
('precision avg', 0.019098335185455066)
('precision avg', 0.029511317676966412)
('precision avg', 0.13596091613435268)
('precision avg', 0.06628139954248631)
('precision avg', 0.15894624504628735)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
('precision avg', 0.8087223234480646)
('precision avg', 0.6246431645192004)
('precision avg', 0.6564277365163304)
('precision avg', 0.8548475040578897)
('precision avg', 0.3866449923138486)
('precision avg', 0.04407018165599923)
('precision avg', 0.6305525928335406)
('precision avg', 0.8586431262759081)
('precision avg', 0.9094951163901436)
('precision avg', 0.43042619558685963)
('precision avg', 0.7143877357447221)
('precision avg', 0.6714638467737778)
('precision avg', 0.13167702125754785)
('precision avg', 0.6347119061510296)
('precision avg', 0.4218849025093671)
('precision avg', 0.8394696101599314)
('precision avg', 0.41909133109112984)
('precision avg', 0.49137907199335934)
('precision avg', 0.5434962294760681)
('precision avg', 0.5255237109769181)
('precision avg', 0.018207304858120263)
('precision avg', 0.41096334652475897)
('precision avg', 0.2995482008650643)
('precision avg', 0.12943625842051695)
('precision avg', 0.46851571662688785)
('precision avg', 0.8730005208941793)
('precision avg', 0.32624183171026927)
('precision avg', 0.87037309891489)
('precision avg', 0.8903295054227871)
('precision avg', 0.3247401991305493)
('precision avg', 0.9404262665672861)
('precision avg', 0.18612609604198724)
('precision avg', 0.6832190050181055)
('precision avg', 0.8503041167751857)
('precision avg', 0.3885914367069556)
('precision avg', 0.07223171179425797)
('precision avg', 0.06655548974343942)
('precision avg', 0.9018062702104928)
('precision avg', 0.6187842974937978)
('precision avg', 0.9281173744128661)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
('precision avg', 0.624509586529224)
('precision avg', 0.5211798048500964)
('precision avg', 0.47413698373950586)
('precision avg', 0.7639870392099527)
('precision avg', 0.30168348382925414)
('precision avg', 0.02016864363656134)
('precision avg', 0.5780249782557191)
('precision avg', 0.8533664269391111)
('precision avg', 0.7898786198862732)
('precision avg', 0.2962800934568883)
('precision avg', 0.6090391904597992)
('precision avg', 0.5711755450519884)
('precision avg', 0.12364045968228651)
('precision avg', 0.3897890357149691)
('precision avg', 0.4059495413065444)
('precision avg', 0.5947838565979695)
('precision avg', 0.3002877518789036)
('precision avg', 0.4305324047938475)
('precision avg', 0.43766238053965867)
('precision avg', 0.44113596091377016)
('precision avg', 0.01726963014185236)
('precision avg', 0.40366920898565617)
('precision avg', 0.2791625929367795)
('precision avg', 0.09226749054973236)
('precision avg', 0.3291085018457033)
('precision avg', 0.7523577774183531)
('precision avg', 0.2933510531370516)
('precision avg', 0.7789060527515866)
('precision avg', 0.4781585844754011)
('precision avg', 0.27262554887522134)
('precision avg', 0.8106990701712056)
('precision avg', 0.17948716199317488)
('precision avg', 0.46850561527616924)
('precision avg', 0.7429835013444328)
('precision avg', 0.26444680402070014)
('precision avg', 0.06978700649645515)
('precision avg', 0.06346258311336817)
('precision avg', 0.88178763991206)
('precision avg', 0.5501415443668539)
('precision avg', 0.6723481555542996)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
('precision avg', 0.582312988541893)
('precision avg', 0.5193456459563168)
('precision avg', 0.5907695245619571)
('precision avg', 0.6515399864573219)
('precision avg', 0.37115173971617904)
('precision avg', 0.043716349444529276)
('precision avg', 0.595289193234168)
('precision avg', 0.7738884403357201)
('precision avg', 0.7644592804937886)
('precision avg', 0.3128787204252841)
('precision avg', 0.5532927097447694)
('precision avg', 0.5907915319816335)
('precision avg', 0.12744554000908173)
('precision avg', 0.440919472040343)
('precision avg', 0.35022956666455196)
('precision avg', 0.7026935512166758)
('precision avg', 0.408552851894306)
('precision avg', 0.49478343796012586)
('precision avg', 0.46639164402721606)
('precision avg', 0.42375700219940554)
('precision avg', 0.018395332008463647)
('precision avg', 0.34478378415001126)
('precision avg', 0.29387452934098823)
('precision avg', 0.12303070023234008)
('precision avg', 0.4339480725003696)
('precision avg', 0.784355741068858)
('precision avg', 0.3340192053349481)
('precision avg', 0.7817656558124935)
('precision avg', 0.6212194798442074)
('precision avg', 0.3002309261235297)
('precision avg', 0.6426368875256027)
('precision avg', 0.18114137356170518)
('precision avg', 0.6597499715250379)
('precision avg', 0.7599538973666159)
('precision avg', 0.3236012443738165)
('precision avg', 0.06974145722281835)
('precision avg', 0.06628174494498319)
('precision avg', 0.8369926820075588)
('precision avg', 0.5302978779309909)
('precision avg', 0.8564583004210408)
[About 368 more lines. Double-click to unfold]
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
... print("precision avg", sum([d['p'] for d in val])/len(val))
...
... for key,val in avg_results:
... pass
...
... extractor_avgs[extractor] = avg_results
...
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
...
... pickle.dump(extractor_avgs,open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/extractor_avgs.pkl','wb'))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
('precision avg', 0.8683999024519955)
('precision avg', 0.6211738688863229)
('precision avg', 0.6484059381381134)
('precision avg', 0.7057985767079694)
('precision avg', 0.350690725741593)
('precision avg', 0.04347666120043913)
('precision avg', 0.6286822306460558)
('precision avg', 0.8572444640811792)
('precision avg', 0.8838432933187248)
('precision avg', 0.4369663029782896)
('precision avg', 0.7543074627097117)
('precision avg', 0.672786168820609)
('precision avg', 0.13308241469189427)
('precision avg', 0.7088740876453605)
('precision avg', 0.41742126841582045)
('precision avg', 0.8464097205988462)
('precision avg', 0.4417019947232166)
('precision avg', 0.5021943230481295)
('precision avg', 0.4915998914598782)
('precision avg', 0.578675352037152)
('precision avg', 0.018386000373501205)
('precision avg', 0.4320541005367761)
('precision avg', 0.3029125013356543)
('precision avg', 0.12218070191163449)
('precision avg', 0.43949
Traceback (most recent call last):
File "<pyshell#100>", line 24, in <module>
for key,val in avg_results:
ValueError: too many values to unpack
08403162711)
('precision avg', 0.8753434627782866)
('precision avg', 0.3387610582651177)
('precision avg', 0.855375126938793)
('precision avg', 0.6766215452483197)
('precision avg', 0.3484809556323725)
('precision avg', 0.8919253176470745)
('precision avg', 0.12117487487387163)
('precision avg', 0.660445014823463)
('precision avg', 0.7906462969568482)
('precision avg', 0.406078738764565)
('precision avg', 0.07401168704789596)
('precision avg', 0.06668416878257147)
('precision avg', 0.8797908423638537)
('precision avg', 0.63792601378758)
('precision avg', 0.926445902055539)
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
... print("precision avg", sum([d['p'] for d in val])/len(val))
...
... for key,val in avg_results.items():
... pass
...
... extractor_avgs[extractor] = avg_results
...
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
...
... pickle.dump(extractor_avgs,open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/extractor_avgs.pkl','wb'))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
('precision avg', 0.8683999024519955)
('precision avg', 0.6211738688863229)
('precision avg', 0.6484059381381134)
('precision avg', 0.7057985767079694)
('precision avg', 0.350690725741593)
('precision avg', 0.04347666120043913)
('precision avg', 0.6286822306460558)
('precision avg', 0.8572444640811792)
('precision avg', 0.8838432933187248)
('precision avg', 0.4369663029782896)
('precision avg', 0.7543074627097117)
('precision avg', 0.672786168820609)
('precision avg', 0.13308241469189427)
('precision avg', 0.7088740876453605)
('precision avg', 0.41742126841582045)
('precision avg', 0.8464097205988462)
('precision avg', 0.4417019947232166)
('precision avg', 0.5021943230481295)
('precision avg', 0.4915998914598782)
('precision avg', 0.578675352037152)
('precision avg', 0.018386000373501205)
('precision avg', 0.4320541005367761)
('precision avg', 0.3029125013356543)
('precision avg', 0.12218070191163449)
('precision avg', 0.4394908403162711)
('precision avg', 0.8753434627782866)
('precision avg', 0.3387610582651177)
('precision avg', 0.855375126938793)
('precision avg', 0.6766215452483197)
('precision avg', 0.3484809556323725)
('precision avg', 0.8919253176470745)
('precision avg', 0.12117487487387163)
('precision avg', 0.660445014823463)
('precision avg', 0.7906462969568482)
('precision avg', 0.406078738764565)
('precision avg', 0.07401168704789596)
('precision avg', 0.06668416878257147)
('precision avg', 0.8797908423638537)
('precision avg', 0.63792601378758)
('precision avg', 0.926445902055539)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
('precision avg', 0.6850494294232871)
('precision avg', 0.6040764658910175)
('precision avg', 0.553762919588722)
('precision avg', 0.6767895607401481)
('precision avg', 0.33316897330572265)
('precision avg', 0.045097010038526314)
('precision avg', 0.6083888210659326)
('precision avg', 0.9250015482359525)
('precision avg', 0.8413135303544912)
('precision avg', 0.30196347934410317)
('precision avg', 0.5811714938921815)
('precision avg', 0.6534683899153207)
('precision avg', 0.11640002710415726)
('precision avg', 0.3535572901091079)
('precision avg', 0.3041230830149431)
('precision avg', 0.7786809001887803)
('precision avg', 0.4186477840737833)
('precision avg', 0.46860182851819593)
('precision avg', 0.4313543594309007)
('precision avg', 0.31975003566238014)
('precision avg', 0.018336170825126605)
('precision avg', 0.33209885398619127)
('precision avg', 0.29940846505010726)
('precision avg', 0.12272875317104835)
('precision avg', 0.4017566608588345)
('precision avg', 0.7997548741274138)
('precision avg', 0.5437722450092386)
('precision avg', 0.8357303273376807)
('precision avg', 0.5177519581452089)
('precision avg', 0.2791851517078164)
('precision avg', 0.5729001117183127)
('precision avg', 0.1207055620925438)
('precision avg', 0.5461973136131762)
('precision avg', 0.7305810594010783)
('precision avg', 0.34039318503958943)
('precision avg', 0.07007420541341315)
('precision avg', 0.06624818281736693)
('precision avg', 0.8673455478551348)
('precision avg', 0.563772151455564)
('precision avg', 0.9210595620529616)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
('precision avg', 0.7085868798180318)
('precision avg', 0.5452230974470502)
('precision avg', 0.5720537517102889)
('precision avg', 0.5996675436639709)
('precision avg', 0.35502210934852385)
('precision avg', 0.044054500032989705)
('precision avg', 0.5986065482125335)
('precision avg', 0.8111021925809357)
('precision avg', 0.8605989019941799)
('precision avg', 0.33226391278577666)
('precision avg', 0.5507998052113364)
('precision avg', 0.6396386293000841)
('precision avg', 0.11919826889537567)
('precision avg', 0.46550867158986875)
('precision avg', 0.3116620725542801)
('precision avg', 0.6700112446095992)
('precision avg', 0.41877132883633295)
('precision avg', 0.45609212979185026)
('precision avg', 0.45163668180184763)
('precision avg', 0.3842242600405546)
('precision avg', 0.018210151381855617)
('precision avg', 0.3597736757896344)
('precision avg', 0.2894186892399318)
('precision avg', 0.11616947582549013)
('precision avg', 0.42023599354375335)
('precision avg', 0.8038458494347899)
('precision avg', 0.3258139976914447)
('precision avg', 0.8370761752694753)
('precision avg', 0.5984474642761501)
('precision avg', 0.2957366925768594)
('precision avg', 0.6203488498249787)
('precision avg', 0.12616883801360262)
('precision avg', 0.6366358379063284)
('precision avg', 0.7878965878803256)
('precision avg', 0.31730289839466586)
('precision avg', 0.07134534027116624)
('precision avg', 0.06563692613337585)
('precision avg', 0.8883697815371147)
('precision avg', 0.5591489354125457)
('precision avg', 0.8962966109245787)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
('precision avg', 0.845171687583807)
('precision avg', 0.6138419519432511)
('precision avg', 0.6604807297526994)
('precision avg', 0.814601457327025)
('precision avg', 0.4108209241176574)
('precision avg', 0.042433386182753455)
('precision avg', 0.62804991992724)
('precision avg', 0.8608073651801277)
('precision avg', 0.8958452423944537)
('precision avg', 0.4429766402972281)
('precision avg', 0.7578331189482941)
('precision avg', 0.6696646303443038)
('precision avg', 0.13348385856834152)
('precision avg', 0.6725911369907592)
('precision avg', 0.41579101137917696)
('precision avg', 0.8307937669693192)
('precision avg', 0.4385329315775029)
('precision avg', 0.5045095303547008)
('precision avg', 0.5713283942566657)
('precision avg', 0.5688487055711114)
('precision avg', 0.018287140557954475)
('precision avg', 0.4253522345143547)
('precision avg', 0.31429130410851036)
('precision avg', 0.13362793010722546)
('precision avg', 0.4775014374694188)
('precision avg', 0.8939153820029523)
('precision avg', 0.3291739634988008)
('precision avg', 0.8780918332144314)
('precision avg', 0.8696766652605129)
('precision avg', 0.34104189997176576)
('precision avg', 0.9269321571721406)
('precision avg', 0.19291701995638091)
('precision avg', 0.6700458372321113)
('precision avg', 0.9004401876670914)
('precision avg', 0.4615173559036399)
('precision avg', 0.07619402011443083)
('precision avg', 0.06688037449997622)
('precision avg', 0.9264909687190436)
('precision avg', 0.647029606678659)
('precision avg', 0.92854392161831)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
('precision avg', 0.42620741281304775)
('precision avg', 0.13467802471805546)
('precision avg', 0.19753138142997795)
('precision avg', 0.36240339186286513)
('precision avg', 0.07315564039653401)
('precision avg', 0.007736638561868903)
('precision avg', 0.5836816702543528)
('precision avg', 0.3108694782857746)
('precision avg', 0.36353215206468886)
('precision avg', 0.04998551810329787)
('precision avg', 0.375579362581543)
('precision avg', 0.19561991680653532)
('precision avg', 0.03774622737251699)
('precision avg', 0.3108646536464451)
('precision avg', 0.060235937357474324)
('precision avg', 0.4101324917837275)
('precision avg', 0.13401296466303134)
('precision avg', 0.05919721060422903)
('precision avg', 0.20247773008213288)
('precision avg', 0.17931260635945423)
('precision avg', 0.013623785917849206)
('precision avg', 0.14134634671225613)
('precision avg', 0.06418395143820504)
('precision avg', 0.03862377662399559)
('precision avg', 0.12564869282111016)
('precision avg', 0.6306054822791173)
('precision avg', 0.08660519833502513)
('precision avg', 0.6510800190870829)
('precision avg', 0.42373560283340894)
('precision avg', 0.09245746028554677)
('precision avg', 0.48511903626399655)
('precision avg', 0.10996307421215494)
('precision avg', 0.26944679283076217)
('precision avg', 0.36475352093750507)
('precision avg', 0.1298783825779457)
('precision avg', 0.01665606784896567)
('precision avg', 0.03007502159405593)
('precision avg', 0.5940573521660849)
('precision avg', 0.29333271544410583)
('precision avg', 0.5122452215303767)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
('precision avg', 0.20901849585870255)
('precision avg', 0.059986740214574584)
('precision avg', 0.10767432608632829)
('precision avg', 0.257515252955112)
('precision avg', 0.04548066527479219)
('precision avg', 0.026359227184905656)
('precision avg', 0.05359185225411894)
('precision avg', 0.259452776657885)
('precision avg', 0.08509745093616063)
('precision avg', 0.05581275411375033)
('precision avg', 0.4335891319208261)
('precision avg', 0.06402753069155678)
('precision avg', 0.0343743719528452)
('precision avg', 0.05855408531900379)
('precision avg', 0.030801142370060597)
('precision avg', 0.1207254953634531)
('precision avg', 0.022684054643112104)
('precision avg', 0.04517924477576478)
('precision avg', 0.17286342176717917)
('precision avg', 0.13330163214667995)
('precision avg', 0.01360822373383575)
('precision avg', 0.10473888905032128)
('precision avg', 0.06889188795616198)
('precision avg', 0.042838971662592956)
('precision avg', 0.09076425485857424)
('precision avg', 0.19892154478650526)
('precision avg', 0.06539045075135168)
('precision avg', 0.7097155883500723)
('precision avg', 0.17945710895699843)
('precision avg', 0.08303484471116346)
('precision avg', 0.20720090593774804)
('precision avg', 0.044021800471251676)
('precision avg', 0.12450890050980233)
('precision avg', 0.3682191437488494)
('precision avg', 0.048415999768495664)
('precision avg', 0.019098335185455066)
('precision avg', 0.029511317676966412)
('precision avg', 0.13596091613435268)
('precision avg', 0.06628139954248631)
('precision avg', 0.15894624504628735)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
('precision avg', 0.8087223234480646)
('precision avg', 0.6246431645192004)
('precision avg', 0.6564277365163304)
('precision avg', 0.8548475040578897)
('precision avg', 0.3866449923138486)
('precision avg', 0.04407018165599923)
('precision avg', 0.6305525928335406)
('precision avg', 0.8586431262759081)
('precision avg', 0.9094951163901436)
('precision avg', 0.43042619558685963)
('precision avg', 0.7143877357447221)
('precision avg', 0.6714638467737778)
('precision avg', 0.13167702125754785)
('precision avg', 0.6347119061510296)
('precision avg', 0.4218849025093671)
('precision avg', 0.8394696101599314)
('precision avg', 0.41909133109112984)
('precision avg', 0.49137907199335934)
('precision avg', 0.5434962294760681)
('precision avg', 0.5255237109769181)
('precision avg', 0.018207304858120263)
('precision avg', 0.41096334652475897)
('precision avg', 0.2995482008650643)
('precision avg', 0.12943625842051695)
('precision avg', 0.46851571662688785)
('precision avg', 0.8730005208941793)
('precision avg', 0.32624183171026927)
('precision avg', 0.87037309891489)
('precision avg', 0.8903295054227871)
('precision avg', 0.3247401991305493)
('precision avg', 0.9404262665672861)
('precision avg', 0.18612609604198724)
('precision avg', 0.6832190050181055)
('precision avg', 0.8503041167751857)
('precision avg', 0.3885914367069556)
('precision avg', 0.07223171179425797)
('precision avg', 0.06655548974343942)
('precision avg', 0.9018062702104928)
('precision avg', 0.6187842974937978)
('precision avg', 0.9281173744128661)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
('precision avg', 0.624509586529224)
('precision avg', 0.5211798048500964)
('precision avg', 0.47413698373950586)
('precision avg', 0.7639870392099527)
('precision avg', 0.30168348382925414)
('precision avg', 0.02016864363656134)
('precision avg', 0.5780249782557191)
('precision avg', 0.8533664269391111)
('precision avg', 0.7898786198862732)
('precision avg', 0.2962800934568883)
('precision avg', 0.6090391904597992)
('precision avg', 0.5711755450519884)
('precision avg', 0.12364045968228651)
('precision avg', 0.3897890357149691)
('precision avg', 0.4059495413065444)
('precision avg', 0.5947838565979695)
('precision avg', 0.3002877518789036)
('precision avg', 0.4305324047938475)
('precision avg', 0.43766238053965867)
('precision avg', 0.44113596091377016)
('precision avg', 0.01726963014185236)
('precision avg', 0.40366920898565617)
('precision avg', 0.2791625929367795)
('precision avg', 0.09226749054973236)
('precision avg', 0.3291085018457033)
('precision avg', 0.7523577774183531)
('precision avg', 0.2933510531370516)
('precision avg', 0.7789060527515866)
('precision avg', 0.4781585844754011)
('precision avg', 0.27262554887522134)
('precision avg', 0.8106990701712056)
('precision avg', 0.17948716199317488)
('precision avg', 0.46850561527616924)
('precision avg', 0.7429835013444328)
('precision avg', 0.26444680402070014)
('precision avg', 0.06978700649645515)
('precision avg', 0.06346258311336817)
('precision avg', 0.88178763991206)
('precision avg', 0.5501415443668539)
('precision avg', 0.6723481555542996)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
('precision avg', 0.582312988541893)
('precision avg', 0.5193456459563168)
('precision avg', 0.5907695245619571)
('precision avg', 0.6515399864573219)
('precision avg', 0.37115173971617904)
('precision avg', 0.043716349444529276)
('precision avg', 0.595289193234168)
('precision avg', 0.7738884403357201)
('precision avg', 0.7644592804937886)
('precision avg', 0.3128787204252841)
('precision avg', 0.5532927097447694)
('precision avg', 0.5907915319816335)
('precision avg', 0.12744554000908173)
('precision avg', 0.440919472040343)
('precision avg', 0.35022956666455196)
('precision avg', 0.7026935512166758)
('precision avg', 0.408552851894306)
('precision avg', 0.49478343796012586)
('precision avg', 0.46639164402721606)
('precision avg', 0.42375700219940554)
('precision avg', 0.018395332008463647)
('precision avg', 0.34478378415001126)
('precision avg', 0.29387452934098823)
('precision avg', 0.12303070023234008)
('precision avg', 0.4339480725003696)
('precision avg', 0.784355741068858)
('precision avg', 0.3340192053349481)
('precision avg', 0.7817656558124935)
('precision avg', 0.6212194798442074)
('precision avg', 0.3002309261235297)
('precision avg', 0.6426368875256027)
('precision avg', 0.18114137356170518)
('precision avg', 0.6597499715250379)
('precision avg', 0.7599538973666159)
('precision avg', 0.3236012443738165)
('precision avg', 0.06974145722281835)
('precision avg', 0.06628174494498319)
('precision avg', 0.8369926820075588)
('precision avg', 0.5302978779309909)
('precision avg', 0.8564583004210408)
[About 368 more lines. Double-click to unfold]
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
... print("precision avg", sum([d['p'] for d in val])/len(val))
...
... avg_results['mean_avgs'] = {year:sum([ val[year] for key,val in avg_results.items() for year in ['2000','2005','2010','2015']])}
...
...
... extractor_avgs[extractor] = avg_results
...
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
...
... pickle.dump(extractor_avgs,open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/extractor_avgs.pkl','wb'))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
('precision avg', 0.8683999024519955)
('precision avg', 0.6211738688863229)
('precision avg', 0.6484059381381134)
('precision avg', 0.7057985767079694)
('precision avg', 0.350690725741593)
('precision avg', 0.04347666120043913)
('precision avg', 0.6286822306460558)
('precision avg', 0.8572444640811792)
('precision avg', 0.8838432933187248)
('precision avg', 0.4369663029782896)
('precision avg', 0.7543074627097117)
('precision avg', 0.672786168820609)
('precision avg', 0.13308241469189427)
('precision avg', 0.7088740876453605)
('precision avg', 0.41742126841582045)
('precision avg', 0.8464097205988462)
('precision avg', 0.4417019947232166)
('precision avg', 0.5021943230481295)
('precision avg', 0.4915998914598782)
('precision avg', 0.578675352037152)
('precision avg', 0.018386000373501205)
('precision avg', 0.4320541005367761)
('precision avg', 0.3029125013356543)
('precision avg', 0.12218070191163449)
('precision avg', 0.43949
Traceback (most recent call last):
File "<pyshell#102>", line 24, in <module>
avg_results['mean_avgs'] = {year:sum([ val[year] for key,val in avg_results.items() for year in ['2000','2005','2010','2015']])}
TypeError: unsupported operand type(s) for +: 'int' and 'dict'
08403162711)
('precision avg', 0.8753434627782866)
('precision avg', 0.3387610582651177)
('precision avg', 0.855375126938793)
('precision avg', 0.6766215452483197)
('precision avg', 0.3484809556323725)
('precision avg', 0.8919253176470745)
('precision avg', 0.12117487487387163)
('precision avg', 0.660445014823463)
('precision avg', 0.7906462969568482)
('precision avg', 0.406078738764565)
('precision avg', 0.07401168704789596)
('precision avg', 0.06668416878257147)
('precision avg', 0.8797908423638537)
('precision avg', 0.63792601378758)
('precision avg', 0.926445902055539)
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
... print("precision avg", sum([d['p'] for d in val])/len(val))
...
... avg_results['mean_avgs'] = {'p':{year:sum([ val[year] for key,val in avg_results.items() for year in ['2000','2005','2010','2015']])}}
...
...
... extractor_avgs[extractor] = avg_results
...
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
...
... pickle.dump(extractor_avgs,open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/extractor_avgs.pkl','wb'))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
('precision avg', 0.8683999024519955)
('precision avg', 0.6211738688863229)
('precision avg', 0.6484059381381134)
('precision avg', 0.7057985767079694)
('precision avg', 0.350690725741593)
('precision avg', 0.04347666120043913)
('precision avg', 0.6286822306460558)
('precision avg', 0.8572444640811792)
('precision avg', 0.8838432933187248)
('precision avg', 0.4369663029782896)
('precision avg', 0.7543074627097117)
('precision avg', 0.672786168820609)
('precision avg', 0.13308241469189427)
('precision avg', 0.7088740876453605)
('precision avg', 0.41742126841582045)
('precision avg', 0.8464097205988462)
('precision avg', 0.4417019947232166)
('precision avg', 0.5021943230481295)
('precision avg', 0.4915998914598782)
('precision avg', 0.578675352037152)
('precision avg', 0.018386000373501205)
('precision avg', 0.4320541005367761)
('precision avg', 0.3029125013356543)
('precision avg', 0.12218070191163449)
('precision avg', 0.43949
Traceback (most recent call last):
File "<pyshell#103>", line 24, in <module>
avg_results['mean_avgs'] = {'p':{year:sum([ val[year] for key,val in avg_results.items() for year in ['2000','2005','2010','2015']])}}
TypeError: unsupported operand type(s) for +: 'int' and 'dict'
08403162711)
('precision avg', 0.8753434627782866)
('precision avg', 0.3387610582651177)
('precision avg', 0.855375126938793)
('precision avg', 0.6766215452483197)
('precision avg', 0.3484809556323725)
('precision avg', 0.8919253176470745)
('precision avg', 0.12117487487387163)
('precision avg', 0.660445014823463)
('precision avg', 0.7906462969568482)
('precision avg', 0.406078738764565)
('precision avg', 0.07401168704789596)
('precision avg', 0.06668416878257147)
('precision avg', 0.8797908423638537)
('precision avg', 0.63792601378758)
('precision avg', 0.926445902055539)
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
... print("precision avg", sum([d['p'] for d in val])/len(val))
...
... avg_results['mean_avgs'] = {'p':{year:sum([ val[year]['avg_precision'] for key,val in avg_results.items() for year in ['2000','2005','2010','2015']])}}
...
...
... extractor_avgs[extractor] = avg_results
...
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
...
... pickle.dump(extractor_avgs,open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/extractor_avgs.pkl','wb'))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
('precision avg', 0.8683999024519955)
('precision avg', 0.6211738688863229)
('precision avg', 0.6484059381381134)
('precision avg', 0.7057985767079694)
('precision avg', 0.350690725741593)
('precision avg', 0.04347666120043913)
('precision avg', 0.6286822306460558)
('precision avg', 0.8572444640811792)
('precision avg', 0.8838432933187248)
('precision avg', 0.4369663029782896)
('precision avg', 0.7543074627097117)
('precision avg', 0.672786168820609)
('precision avg', 0.13308241469189427)
('precision avg', 0.7088740876453605)
('precision avg', 0.41742126841582045)
('precision avg', 0.8464097205988462)
('precision avg', 0.4417019947232166)
('precision avg', 0.5021943230481295)
('precision avg', 0.4915998914598782)
('precision avg', 0.578675352037152)
('precision avg', 0.018386000373501205)
('precision avg', 0.4320541005367761)
('precision avg', 0.3029125013356543)
('precision avg', 0.12218070191163449)
('precision avg', 0.4394908403162711)
('precision avg', 0.8753434627782866)
('precision avg', 0.3387610582651177)
('precision avg', 0.855375126938793)
('precision avg', 0.6766215452483197)
('precision avg', 0.3484809556323725)
('precision avg', 0.8919253176470745)
('precision avg', 0.12117487487387163)
('precision avg', 0.660445014823463)
('precision avg', 0.7906462969568482)
('precision avg', 0.406078738764565)
('precision avg', 0.07401168704789596)
('precision avg', 0.06668416878257147)
('precision avg', 0.8797908423638537)
('precision avg', 0.63792601378758)
('precision avg', 0.926445902055539)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
('precision avg', 0.6850494294232871)
('precision avg', 0.6040764658910175)
('precision avg', 0.553762919588722)
('precision avg', 0.6767895607401481)
('precision avg', 0.33316897330572265)
('precision avg', 0.045097010038526314)
('precision avg', 0.6083888210659326)
('precision avg', 0.9250015482359525)
('precision avg', 0.8413135303544912)
('precision avg', 0.30196347934410317)
('precision avg', 0.5811714938921815)
('precision avg', 0.6534683899153207)
('precision avg', 0.11640002710415726)
('precision avg', 0.3535572901091079)
('precision avg', 0.3041230830149431)
('precision avg', 0.7786809001887803)
('precision avg', 0.4186477840737833)
('precision avg', 0.46860182851819593)
('precision avg', 0.4313543594309007)
('precision avg', 0.31975003566238014)
('precision avg', 0.018336170825126605)
('precision avg', 0.33209885398619127)
('precision avg', 0.29940846505010726)
('precision avg', 0.12272875317104835)
('precision avg', 0.4017566608588345)
('precision avg', 0.7997548741274138)
('precision avg', 0.5437722450092386)
('precision avg', 0.8357303273376807)
('precision avg', 0.5177519581452089)
('precision avg', 0.2791851517078164)
('precision avg', 0.5729001117183127)
('precision avg', 0.1207055620925438)
('precision avg', 0.5461973136131762)
('precision avg', 0.7305810594010783)
('precision avg', 0.34039318503958943)
('precision avg', 0.07007420541341315)
('precision avg', 0.06624818281736693)
('precision avg', 0.8673455478551348)
('precision avg', 0.563772151455564)
('precision avg', 0.9210595620529616)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
('precision avg', 0.7085868798180318)
('precision avg', 0.5452230974470502)
('precision avg', 0.5720537517102889)
('precision avg', 0.5996675436639709)
('precision avg', 0.35502210934852385)
('precision avg', 0.044054500032989705)
('precision avg', 0.5986065482125335)
('precision avg', 0.8111021925809357)
('precision avg', 0.8605989019941799)
('precision avg', 0.33226391278577666)
('precision avg', 0.5507998052113364)
('precision avg', 0.6396386293000841)
('precision avg', 0.11919826889537567)
('precision avg', 0.46550867158986875)
('precision avg', 0.3116620725542801)
('precision avg', 0.6700112446095992)
('precision avg', 0.41877132883633295)
('precision avg', 0.45609212979185026)
('precision avg', 0.45163668180184763)
('precision avg', 0.3842242600405546)
('precision avg', 0.018210151381855617)
('precision avg', 0.3597736757896344)
('precision avg', 0.2894186892399318)
('precision avg', 0.11616947582549013)
('precision avg', 0.42023599354375335)
('precision avg', 0.8038458494347899)
('precision avg', 0.3258139976914447)
('precision avg', 0.8370761752694753)
('precision avg', 0.5984474642761501)
('precision avg', 0.2957366925768594)
('precision avg', 0.6203488498249787)
('precision avg', 0.12616883801360262)
('precision avg', 0.6366358379063284)
('precision avg', 0.7878965878803256)
('precision avg', 0.31730289839466586)
('precision avg', 0.07134534027116624)
('precision avg', 0.06563692613337585)
('precision avg', 0.8883697815371147)
('precision avg', 0.5591489354125457)
('precision avg', 0.8962966109245787)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
('precision avg', 0.845171687583807)
('precision avg', 0.6138419519432511)
('precision avg', 0.6604807297526994)
('precision avg', 0.814601457327025)
('precision avg', 0.4108209241176574)
('precision avg', 0.042433386182753455)
('precision avg', 0.62804991992724)
('precision avg', 0.8608073651801277)
('precision avg', 0.8958452423944537)
('precision avg', 0.4429766402972281)
('precision avg', 0.7578331189482941)
('precision avg', 0.6696646303443038)
('precision avg', 0.13348385856834152)
('precision avg', 0.6725911369907592)
('precision avg', 0.41579101137917696)
('precision avg', 0.8307937669693192)
('precision avg', 0.4385329315775029)
('precision avg', 0.5045095303547008)
('precision avg', 0.5713283942566657)
('precision avg', 0.5688487055711114)
('precision avg', 0.018287140557954475)
('precision avg', 0.4253522345143547)
('precision avg', 0.31429130410851036)
('precision avg', 0.13362793010722546)
('precision avg', 0.4775014374694188)
('precision avg', 0.8939153820029523)
('precision avg', 0.3291739634988008)
('precision avg', 0.8780918332144314)
('precision avg', 0.8696766652605129)
('precision avg', 0.34104189997176576)
('precision avg', 0.9269321571721406)
('precision avg', 0.19291701995638091)
('precision avg', 0.6700458372321113)
('precision avg', 0.9004401876670914)
('precision avg', 0.4615173559036399)
('precision avg', 0.07619402011443083)
('precision avg', 0.06688037449997622)
('precision avg', 0.9264909687190436)
('precision avg', 0.647029606678659)
('precision avg', 0.92854392161831)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
('precision avg', 0.42620741281304775)
('precision avg', 0.13467802471805546)
('precision avg', 0.19753138142997795)
('precision avg', 0.36240339186286513)
('precision avg', 0.07315564039653401)
('precision avg', 0.007736638561868903)
('precision avg', 0.5836816702543528)
('precision avg', 0.3108694782857746)
('precision avg', 0.36353215206468886)
('precision avg', 0.04998551810329787)
('precision avg', 0.375579362581543)
('precision avg', 0.19561991680653532)
('precision avg', 0.03774622737251699)
('precision avg', 0.3108646536464451)
('precision avg', 0.060235937357474324)
('precision avg', 0.4101324917837275)
('precision avg', 0.13401296466303134)
('precision avg', 0.05919721060422903)
('precision avg', 0.20247773008213288)
('precision avg', 0.17931260635945423)
('precision avg', 0.013623785917849206)
('precision avg', 0.14134634671225613)
('precision avg', 0.06418395143820504)
('precision avg', 0.03862377662399559)
('precision avg', 0.12564869282111016)
('precision avg', 0.6306054822791173)
('precision avg', 0.08660519833502513)
('precision avg', 0.6510800190870829)
('precision avg', 0.42373560283340894)
('precision avg', 0.09245746028554677)
('precision avg', 0.48511903626399655)
('precision avg', 0.10996307421215494)
('precision avg', 0.26944679283076217)
('precision avg', 0.36475352093750507)
('precision avg', 0.1298783825779457)
('precision avg', 0.01665606784896567)
('precision avg', 0.03007502159405593)
('precision avg', 0.5940573521660849)
('precision avg', 0.29333271544410583)
('precision avg', 0.5122452215303767)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
('precision avg', 0.20901849585870255)
('precision avg', 0.059986740214574584)
('precision avg', 0.10767432608632829)
('precision avg', 0.257515252955112)
('precision avg', 0.04548066527479219)
('precision avg', 0.026359227184905656)
('precision avg', 0.05359185225411894)
('precision avg', 0.259452776657885)
('precision avg', 0.08509745093616063)
('precision avg', 0.05581275411375033)
('precision avg', 0.4335891319208261)
('precision avg', 0.06402753069155678)
('precision avg', 0.0343743719528452)
('precision avg', 0.05855408531900379)
('precision avg', 0.030801142370060597)
('precision avg', 0.1207254953634531)
('precision avg', 0.022684054643112104)
('precision avg', 0.04517924477576478)
('precision avg', 0.17286342176717917)
('precision avg', 0.13330163214667995)
('precision avg', 0.01360822373383575)
('precision avg', 0.10473888905032128)
('precision avg', 0.06889188795616198)
('precision avg', 0.042838971662592956)
('precision avg', 0.09076425485857424)
('precision avg', 0.19892154478650526)
('precision avg', 0.06539045075135168)
('precision avg', 0.7097155883500723)
('precision avg', 0.17945710895699843)
('precision avg', 0.08303484471116346)
('precision avg', 0.20720090593774804)
('precision avg', 0.044021800471251676)
('precision avg', 0.12450890050980233)
('precision avg', 0.3682191437488494)
('precision avg', 0.048415999768495664)
('precision avg', 0.019098335185455066)
('precision avg', 0.029511317676966412)
('precision avg', 0.13596091613435268)
('precision avg', 0.06628139954248631)
('precision avg', 0.15894624504628735)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
('precision avg', 0.8087223234480646)
('precision avg', 0.6246431645192004)
('precision avg', 0.6564277365163304)
('precision avg', 0.8548475040578897)
('precision avg', 0.3866449923138486)
('precision avg', 0.04407018165599923)
('precision avg', 0.6305525928335406)
('precision avg', 0.8586431262759081)
('precision avg', 0.9094951163901436)
('precision avg', 0.43042619558685963)
('precision avg', 0.7143877357447221)
('precision avg', 0.6714638467737778)
('precision avg', 0.13167702125754785)
('precision avg', 0.6347119061510296)
('precision avg', 0.4218849025093671)
('precision avg', 0.8394696101599314)
('precision avg', 0.41909133109112984)
('precision avg', 0.49137907199335934)
('precision avg', 0.5434962294760681)
('precision avg', 0.5255237109769181)
('precision avg', 0.018207304858120263)
('precision avg', 0.41096334652475897)
('precision avg', 0.2995482008650643)
('precision avg', 0.12943625842051695)
('precision avg', 0.46851571662688785)
('precision avg', 0.8730005208941793)
('precision avg', 0.32624183171026927)
('precision avg', 0.87037309891489)
('precision avg', 0.8903295054227871)
('precision avg', 0.3247401991305493)
('precision avg', 0.9404262665672861)
('precision avg', 0.18612609604198724)
('precision avg', 0.6832190050181055)
('precision avg', 0.8503041167751857)
('precision avg', 0.3885914367069556)
('precision avg', 0.07223171179425797)
('precision avg', 0.06655548974343942)
('precision avg', 0.9018062702104928)
('precision avg', 0.6187842974937978)
('precision avg', 0.9281173744128661)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
('precision avg', 0.624509586529224)
('precision avg', 0.5211798048500964)
('precision avg', 0.47413698373950586)
('precision avg', 0.7639870392099527)
('precision avg', 0.30168348382925414)
('precision avg', 0.02016864363656134)
('precision avg', 0.5780249782557191)
('precision avg', 0.8533664269391111)
('precision avg', 0.7898786198862732)
('precision avg', 0.2962800934568883)
('precision avg', 0.6090391904597992)
('precision avg', 0.5711755450519884)
('precision avg', 0.12364045968228651)
('precision avg', 0.3897890357149691)
('precision avg', 0.4059495413065444)
('precision avg', 0.5947838565979695)
('precision avg', 0.3002877518789036)
('precision avg', 0.4305324047938475)
('precision avg', 0.43766238053965867)
('precision avg', 0.44113596091377016)
('precision avg', 0.01726963014185236)
('precision avg', 0.40366920898565617)
('precision avg', 0.2791625929367795)
('precision avg', 0.09226749054973236)
('precision avg', 0.3291085018457033)
('precision avg', 0.7523577774183531)
('precision avg', 0.2933510531370516)
('precision avg', 0.7789060527515866)
('precision avg', 0.4781585844754011)
('precision avg', 0.27262554887522134)
('precision avg', 0.8106990701712056)
('precision avg', 0.17948716199317488)
('precision avg', 0.46850561527616924)
('precision avg', 0.7429835013444328)
('precision avg', 0.26444680402070014)
('precision avg', 0.06978700649645515)
('precision avg', 0.06346258311336817)
('precision avg', 0.88178763991206)
('precision avg', 0.5501415443668539)
('precision avg', 0.6723481555542996)
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
('precision avg', 0.582312988541893)
('precision avg', 0.5193456459563168)
('precision avg', 0.5907695245619571)
('precision avg', 0.6515399864573219)
('precision avg', 0.37115173971617904)
('precision avg', 0.043716349444529276)
('precision avg', 0.595289193234168)
('precision avg', 0.7738884403357201)
('precision avg', 0.7644592804937886)
('precision avg', 0.3128787204252841)
('precision avg', 0.5532927097447694)
('precision avg', 0.5907915319816335)
('precision avg', 0.12744554000908173)
('precision avg', 0.440919472040343)
('precision avg', 0.35022956666455196)
('precision avg', 0.7026935512166758)
('precision avg', 0.408552851894306)
('precision avg', 0.49478343796012586)
('precision avg', 0.46639164402721606)
('precision avg', 0.42375700219940554)
('precision avg', 0.018395332008463647)
('precision avg', 0.34478378415001126)
('precision avg', 0.29387452934098823)
('precision avg', 0.12303070023234008)
('precision avg', 0.4339480725003696)
('precision avg', 0.784355741068858)
('precision avg', 0.3340192053349481)
('precision avg', 0.7817656558124935)
('precision avg', 0.6212194798442074)
('precision avg', 0.3002309261235297)
('precision avg', 0.6426368875256027)
('precision avg', 0.18114137356170518)
('precision avg', 0.6597499715250379)
('precision avg', 0.7599538973666159)
('precision avg', 0.3236012443738165)
('precision avg', 0.06974145722281835)
('precision avg', 0.06628174494498319)
('precision avg', 0.8369926820075588)
('precision avg', 0.5302978779309909)
('precision avg', 0.8564583004210408)
[About 368 more lines. Double-click to unfold]
>>> extractor_avgs['BodyTextExtractor2Filter']['mean_avgs']
30: {'p': {'2015': 21.45646979874129}}
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
...
...
... avg_results['mean_avgs'] = {'p':{year:sum([ val[year]['avg_precision']
... for key,val in avg_results.items()
... for year in ['2000','2005','2010','2015']])},
... 'a':{year:sum([ val[year]['avg_accuracy']
... for key,val in avg_results.items()
... for year in ['2000','2005','2010','2015']])},
... 'r':{year:sum([ val[year]['avg_recall']
... for key,val in avg_results.items()
... for year in ['2000','2005','2010','2015']])},
... 'f1':{year:sum([ val[year]['avg_f1']
... for key,val in avg_results.items()
... for year in ['2000','2005','2010','2015']])}}
...
...
... extractor_avgs[extractor] = avg_results
...
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
...
... pickle.dump(extractor_avgs,open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/extractor_avgs.pkl','wb'))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
>>> extractor_avgs['BodyTextExtractor2Filter']['mean_avgs']
31: {'a': {'2015': 27.944993835517263},
'f1': {'2015': 24.279275745467846},
'p': {'2015': 21.45646979874129},
'r': {'2015': 33.64782750936527}}
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
...
...
...
... mean_avgs = {}
...
... for year in ['2000','2005','2010','2015']:
...
...
... print(sum([val[year]['avg_accuracy'] for key,val in avg_results.items()]))
...
...
...
... extractor_avgs[extractor] = avg_results
...
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
...
... pickle.dump(extractor_avgs,open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/extractor_avgs.pkl','wb'))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
8.78722978294
7.73189833783
6.57002167835
4.8558440364
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
8.46813361278
7.22211307885
5.39206768991
4.40532661398
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
8.36836783293
7.35110905385
5.92780383316
4.42525463939
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
8.6201800902
7.7548146604
7.10890393272
5.01450602999
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
5.60397946754
5.59251030475
5.94899025303
4.18154454571
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
5.55923859422
4.87995427584
5.6452088604
4.19814077295
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
8.82594960601
7.77822967336
6.89466414497
4.87230406126
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
8.33412299258
7.07051587592
6.54022146689
4.77191363082
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
8.1494568227
7.22274134944
6.19353512271
4.52416747293
[About 44 more lines. Double-click to unfold]
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
...
...
...
... mean_avgs = {'a':{},'p':{},'r':{},'f1':{}}
...
... for year in ['2000','2005','2010','2015']:
...
...
... mean_avgs['a'].update(year,sum([val[year]['avg_accuracy'] for key,val in avg_results.items()])/10)
...
...
...
... extractor_avgs[extractor] = avg_results
...
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
...
... pickle.dump(extractor_avgs,open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/extractor_avgs.pkl','wb'))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
Traceback (most recent call last):
File "<pyshell#109>", line 30, in <module>
mean_avgs['a'].update(year,sum([val[year]['avg_accuracy'] for key,val in avg_results.items()])/10)
TypeError: update expected at most 1 arguments, got 2
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
...
...
...
... mean_avgs = {'a':{},'p':{},'r':{},'f1':{}}
...
... for year in ['2000','2005','2010','2015']:
...
...
... mean_avgs['a'].update(year,sum([val[year]['avg_accuracy'] for key,val in avg_results.items()])/10)
...
...
...
... extractor_avgs[extractor] = avg_results
...
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
...
... pickle.dump(extractor_avgs,open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/extractor_avgs.pkl','wb'))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
Traceback (most recent call last):
File "<pyshell#110>", line 30, in <module>
mean_avgs['a'].update(year,sum([val[year]['avg_accuracy'] for key,val in avg_results.items()])/10)
TypeError: update expected at most 1 arguments, got 2
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
...
...
...
... mean_avgs = {'a':{},'p':{},'r':{},'f1':{}}
...
... for year in ['2000','2005','2010','2015']:
...
...
... mean_avgs['a'].update([year,sum([val[year]['avg_accuracy'] for key,val in avg_results.items()])/10])
...
...
...
... extractor_avgs[extractor] = avg_results
...
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
...
... pickle.dump(extractor_avgs,open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/extractor_avgs.pkl','wb'))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
Traceback (most recent call last):
File "<pyshell#111>", line 30, in <module>
mean_avgs['a'].update([year,sum([val[year]['avg_accuracy'] for key,val in avg_results.items()])/10])
ValueError: dictionary update sequence element #0 has length 4; 2 is required
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
...
...
...
... mean_avgs = {'a':{},'p':{},'r':{},'f1':{}}
...
... for year in ['2000','2005','2010','2015']:
...
...
... mean_avgs['a'].update({year:sum([val[year]['avg_accuracy'] for key,val in avg_results.items()])/10})
...
...
...
... extractor_avgs[extractor] = avg_results
...
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
...
... pickle.dump(extractor_avgs,open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/extractor_avgs.pkl','wb'))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
>>> mean_avgs
32: {'a': {'2000': 0.8149456822704948,
'2005': 0.7222741349443401,
'2010': 0.6193535122708822,
'2015': 0.4524167472934086},
'f1': {},
'p': {},
'r': {}}
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
...
...
...
... mean_avgs = {'a':{},'p':{},'r':{},'f1':{}}
...
... for year in ['2000','2005','2010','2015']:
...
...
... mean_avgs['a'].update({year:sum([val[year]['avg_accuracy'] for key,val in avg_results.items()])/10})
... mean_avgs['p'].update({year:sum([val[year]['avg_precision'] for key,val in avg_results.items()])/10})
... mean_avgs['r'].update({year:sum([val[year]['avg_recall'] for key,val in avg_results.items()])/10})
... mean_avgs['f1'].update({year:sum([val[year]['avg_f1'] for key,val in avg_results.items()])/10})
...
...
...
... extractor_avgs[extractor] = avg_results
... extractor_avgs['mean_avgs'] = mean_avgs
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
...
... pickle.dump(extractor_avgs,open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/extractor_avgs.pkl','wb'))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
>>> extractor_avgs = {}
... for basepath,trimmed in trimmed_results:
... print(basepath)
... extractor = os.path.split(basepath)[-1]
...
... avg_results = {}
... for key,val in trimmed.items():
... domain, year = key.split(";")
...
... if domain not in avg_results:
... avg_results[domain] = {year:{'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}}
... else:
... avg_results[domain][year] = {'avg_precision':sum([d['p'] for d in val])/len(val),
... 'avg_recall':sum([d['r'] for d in val])/len(val),
... 'avg_f1':sum([d['f1'] for d in val])/len(val),
... 'avg_accuracy':sum([d['a'] for d in val])/len(val)}
...
...
...
...
...
... mean_avgs = {'a':{},'p':{},'r':{},'f1':{}}
...
... for year in ['2000','2005','2010','2015']:
...
...
... mean_avgs['a'].update({year:sum([val[year]['avg_accuracy'] for key,val in avg_results.items()])/10})
... mean_avgs['p'].update({year:sum([val[year]['avg_precision'] for key,val in avg_results.items()])/10})
... mean_avgs['r'].update({year:sum([val[year]['avg_recall'] for key,val in avg_results.items()])/10})
... mean_avgs['f1'].update({year:sum([val[year]['avg_f1'] for key,val in avg_results.items()])/10})
...
...
...
... extractor_avgs[extractor] = avg_results
... extractor_avgs[extractor]['mean_avgs'] = mean_avgs
...
... pickle.dump(avg_results,open(os.path.join(basepath,'avgs.pkl'),'wb'))
...
... pickle.dump(extractor_avgs,open('c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests/extractor_avgs.pkl','wb'))
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractor2Filter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\BodyTextExtractorFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\ContentCodeBlurringFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\DocumentSlopeCurveFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorDomFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\FeatureExtractorSplitFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\GeneralCCB
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\KFeatureExtractorDomFilter
c:/crawlToTheFuture/crawl-to-the-future/testing/wbce-tests\LinkQuotaFilter
>>> extractor_avgs['BodyTextExtractor2Filter']['mean_avgs']
33: {'a': {'2000': 0.8787229782935382,
'2005': 0.7731898337825897,
'2010': 0.6570021678354201,
'2015': 0.4855844036401784},
'f1': {'2000': 0.8569268973160075,
'2005': 0.7261412495485622,
'2010': 0.5346961933452483,
'2015': 0.3101632343369664},
'p': {'2000': 0.8336916875707331,
'2005': 0.6419238936352474,
'2010': 0.433459710994768,
'2015': 0.23657168767338047},
'r': {'2000': 0.8979442718539291,
'2005': 0.8870605643639801,
'2010': 0.8046686694377776,
'2015': 0.7751092452808405}}
>>> eatiht_res = pickle.load(open('c:/crawlToTheFuture/crawl-to-the-future/testing/eatiht_praf_output.pkl','rb'))
Traceback (most recent call last):
File "<pyshell#117>", line 1, in <module>
eatiht_res = pickle.load(open('c:/crawlToTheFuture/crawl-to-the-future/testing/eatiht_praf_output.pkl','rb'))
File "c:\python27\lib\pickle.py", line 1378, in load
return Unpickler(file).load()
File "c:\python27\lib\pickle.py", line 858, in load
dispatch[key](self)
File "c:\python27\lib\pickle.py", line 886, in load_proto
raise ValueError, "unsupported pickle protocol: %d" % proto
ValueError: unsupported pickle protocol: 3
>>> eatiht_res = pickle.load(open('c:/crawlToTheFuture/crawl-to-the-future/testing/eatiht_praf_output.pkl','rb'),protocol=3)
Traceback (most recent call last):
File "<pyshell#118>", line 1, in <module>
eatiht_res = pickle.load(open('c:/crawlToTheFuture/crawl-to-the-future/testing/eatiht_praf_output.pkl','rb'),protocol=3)
TypeError: load() got an unexpected keyword argument 'protocol'
>>> extractor_avgs['ContentCodeBlurringFilter']['mean_avgs']
34: {'a': {'2000': 0.8368367832926638,
'2005': 0.7351109053853003,
'2010': 0.5927803833155777,
'2015': 0.4425254639394451},
'f1': {'2000': 0.8161290619784449,
'2005': 0.6880590975573231,
'2010': 0.4912825184372468,
'2015': 0.28839690093305104},
'p': {'2000': 0.743924604803316,
'2005': 0.5767766993981119,
'2010': 0.3694926280178968,
'2015': 0.20266619793602308},
'r': {'2000': 0.9291353033551015,
'2005': 0.913554997090962,
'2010': 0.8674373568798217,
'2015': 0.8559149321962897}}
>>> extractor_avgs['BodyTextExtractorFilter']['mean_avgs']
35: {'a': {'2000': 0.8468133612777893,
'2005': 0.722211307884621,
'2010': 0.5392067689910991,
'2015': 0.4405326613975287},
'f1': {'2000': 0.8273605547788072,
'2005': 0.6607190392672773,
'2010': 0.467828867027195,
'2015': 0.29048280892761486},
'p': {'2000': 0.7636034219896521,
'2005': 0.5815370256792615,
'2010': 0.34315115063483853,
'2015': 0.206725128853794},
'r': {'2000': 0.92738624270083,
'2005': 0.8937158090491095,
'2010': 0.8866616148496138,
'2015': 0.8546900224245262}}
>>> extractor_avgs['FeatureExtractorDomFilter']['mean_avgs']
36: {'a': {'2000': 0.560397946753548,
'2005': 0.5592510304750151,
'2010': 0.5948990253026867,
'2015': 0.4181544545709329},
'f1': {'2000': 0.14891421984513717,
'2005': 0.20178745902113784,
'2010': 0.1086895755071982,
'2015': 0.06508070111775842},
'p': {'2000': 0.442962673341292,
'2005': 0.3041440953792285,
'2010': 0.13414507214531224,
'2015': 0.07358795028287761},
'r': {'2000': 0.10838708049724577,
'2005': 0.21832921624056137,
'2010': 0.14124080539759648,
'2015': 0.2005470912383458}}
>>> extractor_avgs['BodyTextExtractor2Filter']['mean_avgs']
37: {'a': {'2000': 0.8787229782935382,
'2005': 0.7731898337825897,
'2010': 0.6570021678354201,
'2015': 0.4855844036401784},
'f1': {'2000': 0.8569268973160075,
'2005': 0.7261412495485622,
'2010': 0.5346961933452483,
'2015': 0.3101632343369664},
'p': {'2000': 0.8336916875707331,
'2005': 0.6419238936352474,
'2010': 0.433459710994768,
'2015': 0.23657168767338047},
'r': {'2000': 0.8979442718539291,
'2005': 0.8870605643639801,
'2010': 0.8046686694377776,
'2015': 0.7751092452808405}}
>>> extractor_avgs['DocumentSlopeCurveFilter']['mean_avgs']
38: {'a': {'2000': 0.8620180090195776,
'2005': 0.7754814660397891,
'2010': 0.7108903932715295,
'2015': 0.5014506029986247},
'f1': {'2000': 0.8329640627805608,
'2005': 0.7167392566452404,
'2010': 0.5506111817421068,
'2015': 0.3075953957674911},
'p': {'2000': 0.8542314490907603,
'2005': 0.6688361014914828,
'2010': 0.4637099560697746,
'2015': 0.23885825634139532},
'r': {'2000': 0.8324657568143268,
'2005': 0.8297705646868128,
'2010': 0.7574176209204273,
'2015': 0.7297434109799334}}
>>> extractor_avgs['FeatureExtractorSplitFilter']['mean_avgs']
39: {'a': {'2000': 0.5559238594224393,
'2005': 0.4879954275839581,
'2010': 0.5645208860403892,
'2015': 0.41981407729470455},
'f1': {'2000': 0.11396020483828136,
'2005': 0.04810353763544852,
'2010': 0.0775670609790663,
'2015': 0.0678936739541488},
'p': {'2000': 0.22476359576715482,
'2005': 0.11407854065631107,
'2010': 0.08507094603309076,
'2015': 0.07964855567605174},
'r': {'2000': 0.11865467203079069,
'2005': 0.04724404357604244,
'2010': 0.10891001415969159,
'2015': 0.20217775071023286}}
>>> extractor_avgs['GeneralCCB']['mean_avgs']
40: {'a': {'2000': 0.882594960601493,
'2005': 0.7778229673359103,
'2010': 0.6894664144968823,
'2015': 0.4872304061261376},
'f1': {'2000': 0.8548624630356878,
'2005': 0.7206340797549216,
'2010': 0.5379077540676398,
'2015': 0.30035712288100086},
'p': {'2000': 0.8528247683692562,
'2005': 0.6579203856926136,
'2010': 0.44453916317817554,
'2015': 0.22922331754635766},
'r': {'2000': 0.8694851156869386,
'2005': 0.8490569572783155,
'2010': 0.7706439750796462,
'2015': 0.7442655167149075}}
>>> extractor_avgs['KFeatureExtractorDomFilter']['mean_avgs']
41: {'a': {'2000': 0.8334122992577825,
'2005': 0.707051587591644,
'2010': 0.6540221466893419,
'2015': 0.4771913630820097},
'f1': {'2000': 0.7386039492483721,
'2005': 0.5601644344740914,
'2010': 0.427488522913482,
'2015': 0.25671490904788163},
'p': {'2000': 0.7421451614467723,
'2005': 0.5000401393317233,
'2010': 0.3527928752167519,
'2015': 0.19779555506859037},
'r': {'2000': 0.7587784544963219,
'2005': 0.6995459900661355,
'2010': 0.634470878713714,
'2015': 0.649194066536902}}
>>> extractor_avgs['LinkQuotaFilter']['mean_avgs']
42: {'a': {'2000': 0.8149456822704948,
'2005': 0.7222741349443401,
'2010': 0.6193535122708822,
'2015': 0.4524167472934086},
'f1': {'2000': 0.7986246924128585,
'2005': 0.6792511953919993,
'2010': 0.5070612441119073,
'2015': 0.29353269609455346},
'p': {'2000': 0.7111436745441546,
'2005': 0.5656825448584895,
'2010': 0.38676331191435775,
'2015': 0.20907927270316295},
'r': {'2000': 0.9381195610997212,
'2005': 0.9239005140501625,
'2010': 0.8544225829686752,
'2015': 0.8446935375364951}}
>>>